linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* move bd_mutex to the gendisk (resend)
@ 2021-05-12  6:18 Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
                   ` (7 more replies)
  0 siblings, 8 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Hi all,

this series first cleans up gendisk allocation in the md driver to remove
the ERESTARTSYS hack in blkdev_get, then further refactors blkdev_get
and then finally moves bd_mutex into the gendisk as having separate locks
for the whole device vs partitions just complicates locking in places that
add an remove partitions a lot.

Diffstat:
 Documentation/filesystems/locking.rst |    2 
 block/genhd.c                         |   59 +++------
 block/ioctl.c                         |    2 
 block/partitions/core.c               |   45 +++----
 drivers/block/loop.c                  |   14 +-
 drivers/block/xen-blkfront.c          |    8 -
 drivers/block/zram/zram_drv.c         |   18 +--
 drivers/block/zram/zram_drv.h         |    2 
 drivers/md/md.h                       |    6 -
 drivers/s390/block/dasd_genhd.c       |    8 -
 drivers/scsi/sd.c                     |    4 
 fs/block_dev.c                        |  204 ++++++++++++++++------------------
 fs/btrfs/volumes.c                    |    2 
 fs/super.c                            |    8 -
 include/linux/blk_types.h             |    4 
 include/linux/genhd.h                 |    6 -
 init/do_mounts.c                      |   10 -
 17 files changed, 184 insertions(+), 218 deletions(-)

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/8] block: split __blkdev_get
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-19  9:21   ` Ming Lei
  2021-05-12  6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Split __blkdev_get into one helper for the whole device, and one for
opening partitions.  This removes the (bounded) recursion when opening
a partition.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/block_dev.c | 115 +++++++++++++++++++++++--------------------------
 1 file changed, 55 insertions(+), 60 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index b8abccd03e5d..d053510d2f6a 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1287,76 +1287,68 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
  */
 EXPORT_SYMBOL_GPL(bdev_disk_changed);
 
-/*
- * bd_mutex locking:
- *
- *  mutex_lock(part->bd_mutex)
- *    mutex_lock_nested(whole->bd_mutex, 1)
- */
-static int __blkdev_get(struct block_device *bdev, fmode_t mode)
+static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
-	if (!bdev->bd_openers) {
-		if (!bdev_is_partition(bdev)) {
-			ret = 0;
-			if (disk->fops->open)
-				ret = disk->fops->open(bdev, mode);
-
-			if (!ret)
-				set_init_blocksize(bdev);
-
-			/*
-			 * If the device is invalidated, rescan partition
-			 * if open succeeded or failed with -ENOMEDIUM.
-			 * The latter is necessary to prevent ghost
-			 * partitions on a removed medium.
-			 */
-			if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
-			    (!ret || ret == -ENOMEDIUM))
-				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-
-			if (ret)
-				return ret;
-		} else {
-			struct block_device *whole = bdgrab(disk->part0);
-
-			mutex_lock_nested(&whole->bd_mutex, 1);
-			ret = __blkdev_get(whole, mode);
-			if (ret) {
-				mutex_unlock(&whole->bd_mutex);
-				bdput(whole);
-				return ret;
-			}
-			whole->bd_part_count++;
-			mutex_unlock(&whole->bd_mutex);
-
-			if (!(disk->flags & GENHD_FL_UP) ||
-			    !bdev_nr_sectors(bdev)) {
-				__blkdev_put(whole, mode, 1);
-				bdput(whole);
-				return -ENXIO;
-			}
-			set_init_blocksize(bdev);
+	if (disk->fops->open) {
+		ret = disk->fops->open(bdev, mode);
+		if (ret) {
+			/* avoid ghost partitions on a removed medium */
+			if (ret == -ENOMEDIUM &&
+			     test_bit(GD_NEED_PART_SCAN, &disk->state))
+				bdev_disk_changed(bdev, true);
+			return ret;
 		}
+	}
 
+	if (!bdev->bd_openers) {
+		set_init_blocksize(bdev);
 		if (bdev->bd_bdi == &noop_backing_dev_info)
 			bdev->bd_bdi = bdi_get(disk->queue->backing_dev_info);
-	} else {
-		if (!bdev_is_partition(bdev)) {
-			if (bdev->bd_disk->fops->open)
-				ret = bdev->bd_disk->fops->open(bdev, mode);
-			/* the same as first opener case, read comment there */
-			if (test_bit(GD_NEED_PART_SCAN, &disk->state) &&
-			    (!ret || ret == -ENOMEDIUM))
-				bdev_disk_changed(bdev, ret == -ENOMEDIUM);
-			if (ret)
-				return ret;
-		}
 	}
+	if (test_bit(GD_NEED_PART_SCAN, &disk->state))
+		bdev_disk_changed(bdev, false);
 	bdev->bd_openers++;
+	return 0;;
+}
+
+static int blkdev_get_part(struct block_device *part, fmode_t mode)
+{
+	struct gendisk *disk = part->bd_disk;
+	struct block_device *whole;
+	int ret;
+
+	if (part->bd_openers)
+		goto done;
+
+	whole = bdgrab(disk->part0);
+	mutex_lock_nested(&whole->bd_mutex, 1);
+	ret = blkdev_get_whole(whole, mode);
+	if (ret) {
+		mutex_unlock(&whole->bd_mutex);
+		goto out_put_whole;
+	}
+	whole->bd_part_count++;
+	mutex_unlock(&whole->bd_mutex);
+
+	ret = -ENXIO;
+	if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
+		goto out_blkdev_put;
+
+	set_init_blocksize(part);
+	if (part->bd_bdi == &noop_backing_dev_info)
+		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
+done:
+	part->bd_openers++;
 	return 0;
+
+out_blkdev_put:
+	__blkdev_put(whole, mode, 1);
+out_put_whole:
+	bdput(whole);
+	return ret;
 }
 
 struct block_device *blkdev_get_no_open(dev_t dev)
@@ -1450,7 +1442,10 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 	disk_block_events(disk);
 
 	mutex_lock(&bdev->bd_mutex);
-	ret =__blkdev_get(bdev, mode);
+	if (bdev_is_partition(bdev))
+		ret = blkdev_get_part(bdev, mode);
+	else
+		ret = blkdev_get_whole(bdev, mode);
 	if (ret)
 		goto abort_claiming;
 	if (mode & FMODE_EXCL) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-19  9:32   ` Ming Lei
  2021-05-12  6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
                   ` (5 subsequent siblings)
  7 siblings, 1 reply; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Do the early unlocked syncing even earlier to move more code out of
the recursive path.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/block_dev.c | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d053510d2f6a..95fde785dae7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1553,16 +1553,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	/*
-	 * Sync early if it looks like we're the last one.  If someone else
-	 * opens the block device between now and the decrement of bd_openers
-	 * then we did a sync that we didn't need to, but that's not the end
-	 * of the world and we want to avoid long (could be several minute)
-	 * syncs while holding the mutex.
-	 */
-	if (bdev->bd_openers == 1)
-		sync_blockdev(bdev);
-
 	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (for_part)
 		bdev->bd_part_count--;
@@ -1589,6 +1579,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 
+	/*
+	 * Sync early if it looks like we're the last one.  If someone else
+	 * opens the block device between now and the decrement of bd_openers
+	 * then we did a sync that we didn't need to, but that's not the end
+	 * of the world and we want to avoid long (could be several minute)
+	 * syncs while holding the mutex.
+	 */
+	if (bdev->bd_openers == 1)
+		sync_blockdev(bdev);
+
 	mutex_lock(&bdev->bd_mutex);
 
 	if (mode & FMODE_EXCL) {
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/8] block: move bd_mutex to struct gendisk
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
                   ` (4 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Replace the per-block device bd_mutex with a per-gendisk open_mutex,
thus simplifying locking wherever we deal with partitions.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/locking.rst |  2 +-
 block/genhd.c                         |  7 ++---
 block/partitions/core.c               | 24 ++++++++---------
 drivers/block/loop.c                  | 14 +++++-----
 drivers/block/xen-blkfront.c          |  8 +++---
 drivers/block/zram/zram_drv.c         | 18 ++++++-------
 drivers/block/zram/zram_drv.h         |  2 +-
 drivers/md/md.h                       |  6 ++---
 drivers/s390/block/dasd_genhd.c       |  8 +++---
 drivers/scsi/sd.c                     |  4 +--
 fs/block_dev.c                        | 37 +++++++++++----------------
 fs/btrfs/volumes.c                    |  2 +-
 fs/super.c                            |  8 +++---
 include/linux/blk_types.h             |  1 -
 include/linux/genhd.h                 |  3 +++
 15 files changed, 68 insertions(+), 76 deletions(-)

diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst
index 1e894480115b..2183fd8cc350 100644
--- a/Documentation/filesystems/locking.rst
+++ b/Documentation/filesystems/locking.rst
@@ -480,7 +480,7 @@ prototypes::
 locking rules:
 
 ======================= ===================
-ops			bd_mutex
+ops			open_mutex
 ======================= ===================
 open:			yes
 release:		yes
diff --git a/block/genhd.c b/block/genhd.c
index 39ca97b0edc6..8b88e99f6675 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -615,9 +615,9 @@ void del_gendisk(struct gendisk *disk)
 	 */
 	down_write(&bdev_lookup_sem);
 
-	mutex_lock(&disk->part0->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	blk_drop_partitions(disk);
-	mutex_unlock(&disk->part0->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 
 	fsync_bdev(disk->part0);
 	__invalidate_device(disk->part0, true);
@@ -1304,6 +1304,7 @@ struct gendisk *__alloc_disk_node(int minors, int node_id)
 		goto out_free_disk;
 
 	disk->node_id = node_id;
+	mutex_init(&disk->open_mutex);
 	xa_init(&disk->part_tbl);
 	if (xa_insert(&disk->part_tbl, 0, disk->part0, GFP_KERNEL))
 		goto out_destroy_part_tbl;
@@ -1521,7 +1522,7 @@ void disk_unblock_events(struct gendisk *disk)
  * doesn't clear the events from @disk->ev.
  *
  * CONTEXT:
- * If @mask is non-zero must be called with bdev->bd_mutex held.
+ * If @mask is non-zero must be called with disk->open_mutex held.
  */
 void disk_flush_events(struct gendisk *disk, unsigned int mask)
 {
diff --git a/block/partitions/core.c b/block/partitions/core.c
index dc60ecf46fe6..0d33f55a7d78 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -282,7 +282,7 @@ struct device_type part_type = {
 };
 
 /*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
 static void delete_partition(struct block_device *part)
@@ -311,7 +311,7 @@ static ssize_t whole_disk_show(struct device *dev,
 static DEVICE_ATTR(whole_disk, 0444, whole_disk_show, NULL);
 
 /*
- * Must be called either with bd_mutex held, before a disk can be opened or
+ * Must be called either with open_mutex held, before a disk can be opened or
  * after all disk users are gone.
  */
 static struct block_device *add_partition(struct gendisk *disk, int partno,
@@ -450,15 +450,15 @@ int bdev_add_partition(struct block_device *bdev, int partno,
 {
 	struct block_device *part;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	if (partition_overlaps(bdev->bd_disk, start, length, -1)) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	part = add_partition(bdev->bd_disk, partno, start, length,
 			ADDPART_FLAG_NONE, NULL);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	return PTR_ERR_OR_ZERO(part);
 }
 
@@ -471,8 +471,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	if (!part)
 		return -ENXIO;
 
-	mutex_lock(&part->bd_mutex);
-	mutex_lock_nested(&bdev->bd_mutex, 1);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	ret = -EBUSY;
 	if (part->bd_openers)
@@ -481,8 +480,7 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	delete_partition(part);
 	ret = 0;
 out_unlock:
-	mutex_unlock(&bdev->bd_mutex);
-	mutex_unlock(&part->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(part);
 	return ret;
 }
@@ -497,8 +495,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 	if (!part)
 		return -ENXIO;
 
-	mutex_lock(&part->bd_mutex);
-	mutex_lock_nested(&bdev->bd_mutex, 1);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	ret = -EINVAL;
 	if (start != part->bd_start_sect)
 		goto out_unlock;
@@ -511,8 +508,7 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 
 	ret = 0;
 out_unlock:
-	mutex_unlock(&part->bd_mutex);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(part);
 	return ret;
 }
@@ -538,7 +534,7 @@ void blk_drop_partitions(struct gendisk *disk)
 	struct block_device *part;
 	unsigned long idx;
 
-	lockdep_assert_held(&disk->part0->bd_mutex);
+	lockdep_assert_held(&disk->open_mutex);
 
 	xa_for_each_start(&disk->part_tbl, idx, part, 1) {
 		if (!bdgrab(part))
diff --git a/drivers/block/loop.c b/drivers/block/loop.c
index d58d68f3c7cd..95c570f5923f 100644
--- a/drivers/block/loop.c
+++ b/drivers/block/loop.c
@@ -652,9 +652,9 @@ static void loop_reread_partitions(struct loop_device *lo,
 {
 	int rc;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	rc = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	if (rc)
 		pr_warn("%s: partition scan of loop%d (%s) failed (rc=%d)\n",
 			__func__, lo->lo_number, lo->lo_file_name, rc);
@@ -747,7 +747,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev,
 	mutex_unlock(&lo->lo_mutex);
 	/*
 	 * We must drop file reference outside of lo_mutex as dropping
-	 * the file ref can take bd_mutex which creates circular locking
+	 * the file ref can take open_mutex which creates circular locking
 	 * dependency.
 	 */
 	fput(old_file);
@@ -1260,7 +1260,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	mutex_unlock(&lo->lo_mutex);
 	if (partscan) {
 		/*
-		 * bd_mutex has been held already in release path, so don't
+		 * open_mutex has been held already in release path, so don't
 		 * acquire it if this function is called in such case.
 		 *
 		 * If the reread partition isn't from release path, lo_refcnt
@@ -1268,10 +1268,10 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 		 * current holder is released.
 		 */
 		if (!release)
-			mutex_lock(&bdev->bd_mutex);
+			mutex_lock(&bdev->bd_disk->open_mutex);
 		err = bdev_disk_changed(bdev, false);
 		if (!release)
-			mutex_unlock(&bdev->bd_mutex);
+			mutex_unlock(&bdev->bd_disk->open_mutex);
 		if (err)
 			pr_warn("%s: partition scan of loop%d failed (rc=%d)\n",
 				__func__, lo_number, err);
@@ -1298,7 +1298,7 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
 	/*
 	 * Need not hold lo_mutex to fput backing file. Calling fput holding
 	 * lo_mutex triggers a circular lock dependency possibility warning as
-	 * fput can take bd_mutex which is usually taken before lo_mutex.
+	 * fput can take open_mutex which is usually taken before lo_mutex.
 	 */
 	if (filp)
 		fput(filp);
diff --git a/drivers/block/xen-blkfront.c b/drivers/block/xen-blkfront.c
index 10df39a8b18d..f2c1aedcdf5a 100644
--- a/drivers/block/xen-blkfront.c
+++ b/drivers/block/xen-blkfront.c
@@ -2163,7 +2163,7 @@ static void blkfront_closing(struct blkfront_info *info)
 		return;
 	}
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	if (bdev->bd_openers) {
 		xenbus_dev_error(xbdev, -EBUSY,
@@ -2174,7 +2174,7 @@ static void blkfront_closing(struct blkfront_info *info)
 		xenbus_frontend_closed(xbdev);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	bdput(bdev);
 }
 
@@ -2531,7 +2531,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 	 * isn't closed yet, we let release take care of it.
 	 */
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	info = disk->private_data;
 
 	dev_warn(disk_to_dev(disk),
@@ -2546,7 +2546,7 @@ static int blkfront_remove(struct xenbus_device *xbdev)
 		mutex_unlock(&blkfront_mutex);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 	bdput(bdev);
 
 	return 0;
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index cf8deecc39ef..b3bf544493d3 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -1781,24 +1781,24 @@ static ssize_t reset_store(struct device *dev,
 	zram = dev_to_zram(dev);
 	bdev = zram->disk->part0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	/* Do not reset an active device or claimed device */
 	if (bdev->bd_openers || zram->claim) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	/* From now on, anyone can't open /dev/zram[0-9] */
 	zram->claim = true;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Make sure all the pending I/O are finished */
 	fsync_bdev(bdev);
 	zram_reset_device(zram);
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	zram->claim = false;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	return len;
 }
@@ -1808,7 +1808,7 @@ static int zram_open(struct block_device *bdev, fmode_t mode)
 	int ret = 0;
 	struct zram *zram;
 
-	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));
+	WARN_ON(!mutex_is_locked(&bdev->bd_disk->open_mutex));
 
 	zram = bdev->bd_disk->private_data;
 	/* zram was claimed to reset so open request fails */
@@ -1982,14 +1982,14 @@ static int zram_remove(struct zram *zram)
 {
 	struct block_device *bdev = zram->disk->part0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	if (bdev->bd_openers || zram->claim) {
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 		return -EBUSY;
 	}
 
 	zram->claim = true;
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	zram_debugfs_unregister(zram);
 
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index 419a7e8281ee..74c411911b6e 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -112,7 +112,7 @@ struct zram {
 	/*
 	 * zram is claimed so open request will be failed
 	 */
-	bool claim; /* Protected by bdev->bd_mutex */
+	bool claim; /* Protected by disk->open_mutex */
 	struct file *backing_dev;
 #ifdef CONFIG_ZRAM_WRITEBACK
 	spinlock_t wb_limit_lock;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index fb7eab58cfd5..a88086d4110c 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -395,10 +395,10 @@ struct mddev {
 	 * that we are never stopping an array while it is open.
 	 * 'reconfig_mutex' protects all other reconfiguration.
 	 * These locks are separate due to conflicting interactions
-	 * with bdev->bd_mutex.
+	 * with disk->open_mutex.
 	 * Lock ordering is:
-	 *  reconfig_mutex -> bd_mutex
-	 *  bd_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
+	 *  reconfig_mutex -> disk->open_mutex
+	 *  disk->open_mutex -> open_mutex:  e.g. __blkdev_get -> md_open
 	 */
 	struct mutex			open_mutex;
 	struct mutex			reconfig_mutex;
diff --git a/drivers/s390/block/dasd_genhd.c b/drivers/s390/block/dasd_genhd.c
index 8d6587ec73e2..bf2082d461c7 100644
--- a/drivers/s390/block/dasd_genhd.c
+++ b/drivers/s390/block/dasd_genhd.c
@@ -109,9 +109,9 @@ int dasd_scan_partitions(struct dasd_block *block)
 		return -ENODEV;
 	}
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&block->gdp->open_mutex);
 	rc = bdev_disk_changed(bdev, false);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&block->gdp->open_mutex);
 	if (rc)
 		DBF_DEV_EVENT(DBF_ERR, block->base,
 				"scan partitions error, rc %d", rc);
@@ -145,9 +145,9 @@ void dasd_destroy_partitions(struct dasd_block *block)
 	bdev = block->bdev;
 	block->bdev = NULL;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 	bdev_disk_changed(bdev, true);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 
 	/* Matching blkdev_put to the blkdev_get in dasd_scan_partitions. */
 	blkdev_put(bdev, FMODE_READ);
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index cb3c37d1e009..d3ff723af879 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1400,7 +1400,7 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
  *	In the latter case @inode and @filp carry an abridged amount
  *	of information as noted above.
  *
- *	Locking: called with bdev->bd_mutex held.
+ *	Locking: called with bdev->bd_disk->open_mutex held.
  **/
 static int sd_open(struct block_device *bdev, fmode_t mode)
 {
@@ -1476,7 +1476,7 @@ static int sd_open(struct block_device *bdev, fmode_t mode)
  *	Note: may block (uninterruptible) if error recovery is underway
  *	on this disk.
  *
- *	Locking: called with bdev->bd_mutex held.
+ *	Locking: called with bdev->bd_disk->open_mutex held.
  **/
 static void sd_release(struct gendisk *disk, fmode_t mode)
 {
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 95fde785dae7..3597532cc081 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -895,7 +895,6 @@ struct block_device *bdev_alloc(struct gendisk *disk, u8 partno)
 	mapping_set_gfp_mask(&inode->i_data, GFP_USER);
 
 	bdev = I_BDEV(inode);
-	mutex_init(&bdev->bd_mutex);
 	mutex_init(&bdev->bd_fsfreeze_mutex);
 	spin_lock_init(&bdev->bd_size_lock);
 	bdev->bd_disk = disk;
@@ -1154,7 +1153,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 	struct bd_holder_disk *holder;
 	int ret = 0;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	WARN_ON_ONCE(!bdev->bd_holder);
 
@@ -1199,7 +1198,7 @@ int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk)
 out_free:
 	kfree(holder);
 out_unlock:
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(bd_link_disk_holder);
@@ -1218,7 +1217,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 {
 	struct bd_holder_disk *holder;
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&bdev->bd_disk->open_mutex);
 
 	holder = bd_find_holder_disk(bdev, disk);
 
@@ -1230,7 +1229,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 		kfree(holder);
 	}
 
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&bdev->bd_disk->open_mutex);
 }
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
@@ -1242,7 +1241,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 	struct gendisk *disk = bdev->bd_disk;
 	int ret = 0;
 
-	lockdep_assert_held(&bdev->bd_mutex);
+	lockdep_assert_held(&disk->open_mutex);
 
 rescan:
 	if (bdev->bd_part_count)
@@ -1324,14 +1323,10 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 		goto done;
 
 	whole = bdgrab(disk->part0);
-	mutex_lock_nested(&whole->bd_mutex, 1);
 	ret = blkdev_get_whole(whole, mode);
-	if (ret) {
-		mutex_unlock(&whole->bd_mutex);
+	if (ret)
 		goto out_put_whole;
-	}
 	whole->bd_part_count++;
-	mutex_unlock(&whole->bd_mutex);
 
 	ret = -ENXIO;
 	if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
@@ -1441,7 +1436,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 
 	disk_block_events(disk);
 
-	mutex_lock(&bdev->bd_mutex);
+	mutex_lock(&disk->open_mutex);
 	if (bdev_is_partition(bdev))
 		ret = blkdev_get_part(bdev, mode);
 	else
@@ -1464,7 +1459,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 			unblock_events = false;
 		}
 	}
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 
 	if (unblock_events)
 		disk_unblock_events(disk);
@@ -1473,7 +1468,7 @@ struct block_device *blkdev_get_by_dev(dev_t dev, fmode_t mode, void *holder)
 abort_claiming:
 	if (mode & FMODE_EXCL)
 		bd_abort_claiming(bdev, holder);
-	mutex_unlock(&bdev->bd_mutex);
+	mutex_unlock(&disk->open_mutex);
 	disk_unblock_events(disk);
 put_blkdev:
 	blkdev_put_no_open(bdev);
@@ -1553,7 +1548,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	mutex_lock_nested(&bdev->bd_mutex, for_part);
 	if (for_part)
 		bdev->bd_part_count--;
 
@@ -1568,7 +1562,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 
 	if (!bdev_is_partition(bdev) && disk->fops->release)
 		disk->fops->release(disk, mode);
-	mutex_unlock(&bdev->bd_mutex);
 	if (victim) {
 		__blkdev_put(victim, mode, 1);
 		bdput(victim);
@@ -1589,15 +1582,14 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	if (bdev->bd_openers == 1)
 		sync_blockdev(bdev);
 
-	mutex_lock(&bdev->bd_mutex);
-
+	mutex_lock(&disk->open_mutex);
 	if (mode & FMODE_EXCL) {
 		struct block_device *whole = bdev_whole(bdev);
 		bool bdev_free;
 
 		/*
 		 * Release a claim on the device.  The holder fields
-		 * are protected with bdev_lock.  bd_mutex is to
+		 * are protected with bdev_lock.  open_mutex is to
 		 * synchronize disk_holder unlinking.
 		 */
 		spin_lock(&bdev_lock);
@@ -1628,9 +1620,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 * from userland - e.g. eject(1).
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
-	mutex_unlock(&bdev->bd_mutex);
 
 	__blkdev_put(bdev, mode, 0);
+	mutex_unlock(&disk->open_mutex);
+
 	blkdev_put_no_open(bdev);
 }
 EXPORT_SYMBOL(blkdev_put);
@@ -1937,10 +1930,10 @@ void iterate_bdevs(void (*func)(struct block_device *, void *), void *arg)
 		old_inode = inode;
 		bdev = I_BDEV(inode);
 
-		mutex_lock(&bdev->bd_mutex);
+		mutex_lock(&bdev->bd_disk->open_mutex);
 		if (bdev->bd_openers)
 			func(bdev, arg);
-		mutex_unlock(&bdev->bd_mutex);
+		mutex_unlock(&bdev->bd_disk->open_mutex);
 
 		spin_lock(&blockdev_superblock->s_inode_list_lock);
 	}
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 9a1ead0c4a31..0f35bf7030ac 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1247,7 +1247,7 @@ int btrfs_open_devices(struct btrfs_fs_devices *fs_devices,
 	lockdep_assert_held(&uuid_mutex);
 	/*
 	 * The device_list_mutex cannot be taken here in case opening the
-	 * underlying device takes further locks like bd_mutex.
+	 * underlying device takes further locks like open_mutex.
 	 *
 	 * We also don't need the lock here as this is called during mount and
 	 * exclusion is provided by uuid_mutex
diff --git a/fs/super.c b/fs/super.c
index 11b7e7213fd1..91b7f156735b 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -1277,9 +1277,9 @@ int get_tree_bdev(struct fs_context *fc,
 		}
 
 		/*
-		 * s_umount nests inside bd_mutex during
+		 * s_umount nests inside open_mutex during
 		 * __invalidate_device().  blkdev_put() acquires
-		 * bd_mutex and can't be called under s_umount.  Drop
+		 * open_mutex and can't be called under s_umount.  Drop
 		 * s_umount temporarily.  This is safe as we're
 		 * holding an active reference.
 		 */
@@ -1352,9 +1352,9 @@ struct dentry *mount_bdev(struct file_system_type *fs_type,
 		}
 
 		/*
-		 * s_umount nests inside bd_mutex during
+		 * s_umount nests inside open_mutex during
 		 * __invalidate_device().  blkdev_put() acquires
-		 * bd_mutex and can't be called under s_umount.  Drop
+		 * open_mutex and can't be called under s_umount.  Drop
 		 * s_umount temporarily.  This is safe as we're
 		 * holding an active reference.
 		 */
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index db026b6ec15a..a09660671fa4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -29,7 +29,6 @@ struct block_device {
 	int			bd_openers;
 	struct inode *		bd_inode;	/* will die */
 	struct super_block *	bd_super;
-	struct mutex		bd_mutex;	/* open/close mutex */
 	void *			bd_claiming;
 	struct device		bd_device;
 	void *			bd_holder;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 7e9660ea967d..0747f1853f39 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -153,6 +153,9 @@ struct gendisk {
 	unsigned long state;
 #define GD_NEED_PART_SCAN		0
 #define GD_READ_ONLY			1
+
+	struct mutex open_mutex;	/* open/close mutex */
+
 	struct kobject *slave_dir;
 
 	struct timer_rand_state *random;
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
                   ` (2 preceding siblings ...)
  2021-05-12  6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
                   ` (3 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Keep in the callers and thus remove the for_part argument.  This mirrors
what is done on the blkdev_get side and slightly simplifies
blkdev_get_part as well.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/block_dev.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 3597532cc081..59cdcdb97e3d 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,7 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part);
+static void __blkdev_put(struct block_device *bdev, fmode_t mode);
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 {
@@ -1326,12 +1326,12 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	ret = blkdev_get_whole(whole, mode);
 	if (ret)
 		goto out_put_whole;
-	whole->bd_part_count++;
 
 	ret = -ENXIO;
 	if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
 		goto out_blkdev_put;
 
+	whole->bd_part_count++;
 	set_init_blocksize(part);
 	if (part->bd_bdi == &noop_backing_dev_info)
 		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1340,7 +1340,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	return 0;
 
 out_blkdev_put:
-	__blkdev_put(whole, mode, 1);
+	__blkdev_put(whole, mode);
 out_put_whole:
 	bdput(whole);
 	return ret;
@@ -1543,14 +1543,11 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
+static void __blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
 	struct block_device *victim = NULL;
 
-	if (for_part)
-		bdev->bd_part_count--;
-
 	if (!--bdev->bd_openers) {
 		WARN_ON_ONCE(bdev->bd_holders);
 		sync_blockdev(bdev);
@@ -1563,7 +1560,8 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
 	if (!bdev_is_partition(bdev) && disk->fops->release)
 		disk->fops->release(disk, mode);
 	if (victim) {
-		__blkdev_put(victim, mode, 1);
+		victim->bd_part_count--;
+		__blkdev_put(victim, mode);
 		bdput(victim);
 	}
 }
@@ -1621,7 +1619,7 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
-	__blkdev_put(bdev, mode, 0);
+	__blkdev_put(bdev, mode);
 	mutex_unlock(&disk->open_mutex);
 
 	blkdev_put_no_open(bdev);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 5/8] block: split __blkdev_put
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
                   ` (3 preceding siblings ...)
  2021-05-12  6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
                   ` (2 subsequent siblings)
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Split __blkdev_put into one helper for the whole device, and one for
partitions as well as another shared helper for flushing the block
device inode mapping.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 fs/block_dev.c | 58 ++++++++++++++++++++++++++++----------------------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 59cdcdb97e3d..4bcab845ac05 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1234,7 +1234,13 @@ void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk)
 EXPORT_SYMBOL_GPL(bd_unlink_disk_holder);
 #endif
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode);
+static void blkdev_flush_mapping(struct block_device *bdev)
+{
+	WARN_ON_ONCE(bdev->bd_holders);
+	sync_blockdev(bdev);
+	kill_bdev(bdev);
+	bdev_write_inode(bdev);
+}
 
 int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 {
@@ -1313,6 +1319,14 @@ static int blkdev_get_whole(struct block_device *bdev, fmode_t mode)
 	return 0;;
 }
 
+static void blkdev_put_whole(struct block_device *bdev, fmode_t mode)
+{
+	if (!--bdev->bd_openers)
+		blkdev_flush_mapping(bdev);
+	if (bdev->bd_disk->fops->release)
+		bdev->bd_disk->fops->release(bdev->bd_disk, mode);
+}
+
 static int blkdev_get_part(struct block_device *part, fmode_t mode)
 {
 	struct gendisk *disk = part->bd_disk;
@@ -1340,12 +1354,24 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	return 0;
 
 out_blkdev_put:
-	__blkdev_put(whole, mode);
+	blkdev_put_whole(whole, mode);
 out_put_whole:
 	bdput(whole);
 	return ret;
 }
 
+static void blkdev_put_part(struct block_device *part, fmode_t mode)
+{
+	struct block_device *whole = bdev_whole(part);
+
+	if (--part->bd_openers)
+		return;
+	blkdev_flush_mapping(part);
+	whole->bd_part_count--;
+	blkdev_put_whole(whole, mode);
+	bdput(whole);
+}
+
 struct block_device *blkdev_get_no_open(dev_t dev)
 {
 	struct block_device *bdev;
@@ -1543,29 +1569,6 @@ static int blkdev_open(struct inode * inode, struct file * filp)
 	return 0;
 }
 
-static void __blkdev_put(struct block_device *bdev, fmode_t mode)
-{
-	struct gendisk *disk = bdev->bd_disk;
-	struct block_device *victim = NULL;
-
-	if (!--bdev->bd_openers) {
-		WARN_ON_ONCE(bdev->bd_holders);
-		sync_blockdev(bdev);
-		kill_bdev(bdev);
-		bdev_write_inode(bdev);
-		if (bdev_is_partition(bdev))
-			victim = bdev_whole(bdev);
-	}
-
-	if (!bdev_is_partition(bdev) && disk->fops->release)
-		disk->fops->release(disk, mode);
-	if (victim) {
-		victim->bd_part_count--;
-		__blkdev_put(victim, mode);
-		bdput(victim);
-	}
-}
-
 void blkdev_put(struct block_device *bdev, fmode_t mode)
 {
 	struct gendisk *disk = bdev->bd_disk;
@@ -1619,7 +1622,10 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
 	 */
 	disk_flush_events(disk, DISK_EVENT_MEDIA_CHANGE);
 
-	__blkdev_put(bdev, mode);
+	if (bdev_is_partition(bdev))
+		blkdev_put_part(bdev, mode);
+	else
+		blkdev_put_whole(bdev, mode);
 	mutex_unlock(&disk->open_mutex);
 
 	blkdev_put_no_open(bdev);
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 6/8] block: move bd_part_count to struct gendisk
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
                   ` (4 preceding siblings ...)
  2021-05-12  6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

The bd_part_count value only makes sense for whole devices, so move it
to struct gendisk and give it a more descriptive name.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/ioctl.c             | 2 +-
 fs/block_dev.c            | 6 +++---
 include/linux/blk_types.h | 3 ---
 include/linux/genhd.h     | 1 +
 4 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/block/ioctl.c b/block/ioctl.c
index 8ba1ed8defd0..24beec9ca9c9 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -89,7 +89,7 @@ static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
 		return -EINVAL;
 	if (!capable(CAP_SYS_ADMIN))
 		return -EACCES;
-	if (bdev->bd_part_count)
+	if (bdev->bd_disk->open_partitions)
 		return -EBUSY;
 
 	/*
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4bcab845ac05..8dd8e2fd1401 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1250,7 +1250,7 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 	lockdep_assert_held(&disk->open_mutex);
 
 rescan:
-	if (bdev->bd_part_count)
+	if (disk->open_partitions)
 		return -EBUSY;
 	sync_blockdev(bdev);
 	invalidate_bdev(bdev);
@@ -1345,7 +1345,7 @@ static int blkdev_get_part(struct block_device *part, fmode_t mode)
 	if (!(disk->flags & GENHD_FL_UP) || !bdev_nr_sectors(part))
 		goto out_blkdev_put;
 
-	whole->bd_part_count++;
+	disk->open_partitions++;
 	set_init_blocksize(part);
 	if (part->bd_bdi == &noop_backing_dev_info)
 		part->bd_bdi = bdi_get(disk->queue->backing_dev_info);
@@ -1367,7 +1367,7 @@ static void blkdev_put_part(struct block_device *part, fmode_t mode)
 	if (--part->bd_openers)
 		return;
 	blkdev_flush_mapping(part);
-	whole->bd_part_count--;
+	whole->bd_disk->open_partitions--;
 	blkdev_put_whole(whole, mode);
 	bdput(whole);
 }
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index a09660671fa4..fd3860d18d7e 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -39,9 +39,6 @@ struct block_device {
 #endif
 	struct kobject		*bd_holder_dir;
 	u8			bd_partno;
-	/* number of times partitions within this device have been opened. */
-	unsigned		bd_part_count;
-
 	spinlock_t		bd_size_lock; /* for bd_inode->i_size updates */
 	struct gendisk *	bd_disk;
 	struct backing_dev_info *bd_bdi;
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 0747f1853f39..74fd28ddac70 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -155,6 +155,7 @@ struct gendisk {
 #define GD_READ_ONLY			1
 
 	struct mutex open_mutex;	/* open/close mutex */
+	unsigned open_partitions;	/* number of open partitions */
 
 	struct kobject *slave_dir;
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 7/8] block: factor out a part_devt helper
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
                   ` (5 preceding siblings ...)
  2021-05-12  6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  2021-05-12  6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Add a helper to find the dev_t for a disk + partno tuple.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/genhd.c         | 25 +++++++++++++++++--------
 include/linux/genhd.h |  1 +
 init/do_mounts.c      | 10 ++--------
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 8b88e99f6675..14fd777811fe 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1251,6 +1251,19 @@ static int __init proc_genhd_init(void)
 module_init(proc_genhd_init);
 #endif /* CONFIG_PROC_FS */
 
+dev_t part_devt(struct gendisk *disk, u8 partno)
+{
+	struct block_device *part = bdget_disk(disk, partno);
+	dev_t devt = 0;
+
+	if (part) {
+		devt = part->bd_dev;
+		bdput(part);
+	}
+
+	return devt;
+}
+
 dev_t blk_lookup_devt(const char *name, int partno)
 {
 	dev_t devt = MKDEV(0, 0);
@@ -1260,7 +1273,6 @@ dev_t blk_lookup_devt(const char *name, int partno)
 	class_dev_iter_init(&iter, &block_class, NULL, &disk_type);
 	while ((dev = class_dev_iter_next(&iter))) {
 		struct gendisk *disk = dev_to_disk(dev);
-		struct block_device *part;
 
 		if (strcmp(dev_name(dev), name))
 			continue;
@@ -1271,13 +1283,10 @@ dev_t blk_lookup_devt(const char *name, int partno)
 			 */
 			devt = MKDEV(MAJOR(dev->devt),
 				     MINOR(dev->devt) + partno);
-			break;
-		}
-		part = bdget_disk(disk, partno);
-		if (part) {
-			devt = part->bd_dev;
-			bdput(part);
-			break;
+		} else {
+			devt = part_devt(disk, partno);
+			if (devt)
+				break;
 		}
 	}
 	class_dev_iter_exit(&iter);
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 74fd28ddac70..4c4d903caa09 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -312,6 +312,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 
 extern struct rw_semaphore bdev_lookup_sem;
 
+dev_t part_devt(struct gendisk *disk, u8 partno);
 dev_t blk_lookup_devt(const char *name, int partno);
 void blk_request_module(dev_t devt);
 #ifdef CONFIG_BLOCK
diff --git a/init/do_mounts.c b/init/do_mounts.c
index a78e44ee6adb..74aede860de7 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -133,14 +133,8 @@ static dev_t devt_from_partuuid(const char *uuid_str)
 		 * Attempt to find the requested partition by adding an offset
 		 * to the partition number found by UUID.
 		 */
-		struct block_device *part;
-
-		part = bdget_disk(dev_to_disk(dev),
-				  dev_to_bdev(dev)->bd_partno + offset);
-		if (part) {
-			devt = part->bd_dev;
-			bdput(part);
-		}
+		devt = part_devt(dev_to_disk(dev),
+				 dev_to_bdev(dev)->bd_partno + offset);
 	} else {
 		devt = dev->devt;
 	}
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 8/8] block: remove bdget_disk
  2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
                   ` (6 preceding siblings ...)
  2021-05-12  6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
@ 2021-05-12  6:18 ` Christoph Hellwig
  7 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-12  6:18 UTC (permalink / raw)
  To: Jens Axboe, Song Liu
  Cc: Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

Just opencode the xa_load in the callers, as none of them actually
needs a reference to the bdev.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/genhd.c           | 35 +++++------------------------------
 block/partitions/core.c | 25 ++++++++++++-------------
 include/linux/genhd.h   |  1 -
 3 files changed, 17 insertions(+), 44 deletions(-)

diff --git a/block/genhd.c b/block/genhd.c
index 14fd777811fe..a5847560719c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -701,32 +701,6 @@ void blk_request_module(dev_t devt)
 		request_module("block-major-%d", MAJOR(devt));
 }
 
-/**
- * bdget_disk - do bdget() by gendisk and partition number
- * @disk: gendisk of interest
- * @partno: partition number
- *
- * Find partition @partno from @disk, do bdget() on it.
- *
- * CONTEXT:
- * Don't care.
- *
- * RETURNS:
- * Resulting block_device on success, NULL on failure.
- */
-struct block_device *bdget_disk(struct gendisk *disk, int partno)
-{
-	struct block_device *bdev = NULL;
-
-	rcu_read_lock();
-	bdev = xa_load(&disk->part_tbl, partno);
-	if (bdev && !bdgrab(bdev))
-		bdev = NULL;
-	rcu_read_unlock();
-
-	return bdev;
-}
-
 /*
  * print a full list of all partitions - intended for places where the root
  * filesystem can't be mounted and thus to give the victim some idea of what
@@ -1253,13 +1227,14 @@ module_init(proc_genhd_init);
 
 dev_t part_devt(struct gendisk *disk, u8 partno)
 {
-	struct block_device *part = bdget_disk(disk, partno);
+	struct block_device *part;
 	dev_t devt = 0;
 
-	if (part) {
+	rcu_read_lock();
+	part = xa_load(&disk->part_tbl, partno);
+	if (part)
 		devt = part->bd_dev;
-		bdput(part);
-	}
+	rcu_read_unlock();
 
 	return devt;
 }
diff --git a/block/partitions/core.c b/block/partitions/core.c
index 0d33f55a7d78..325368b9de29 100644
--- a/block/partitions/core.c
+++ b/block/partitions/core.c
@@ -325,6 +325,8 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
 	const char *dname;
 	int err;
 
+	lockdep_assert_held(&disk->open_mutex);
+
 	/*
 	 * disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
 	 * or 'minors' is passed to alloc_disk().
@@ -464,14 +466,13 @@ int bdev_add_partition(struct block_device *bdev, int partno,
 
 int bdev_del_partition(struct block_device *bdev, int partno)
 {
-	struct block_device *part;
-	int ret;
-
-	part = bdget_disk(bdev->bd_disk, partno);
-	if (!part)
-		return -ENXIO;
+	struct block_device *part = NULL;
+	int ret = -ENXIO;
 
 	mutex_lock(&bdev->bd_disk->open_mutex);
+	part = xa_load(&bdev->bd_disk->part_tbl, partno);
+	if (!part)
+		goto out_unlock;
 
 	ret = -EBUSY;
 	if (part->bd_openers)
@@ -481,21 +482,20 @@ int bdev_del_partition(struct block_device *bdev, int partno)
 	ret = 0;
 out_unlock:
 	mutex_unlock(&bdev->bd_disk->open_mutex);
-	bdput(part);
 	return ret;
 }
 
 int bdev_resize_partition(struct block_device *bdev, int partno,
 		sector_t start, sector_t length)
 {
-	struct block_device *part;
-	int ret = 0;
+	struct block_device *part = NULL;
+	int ret = -ENXIO;
 
-	part = bdget_disk(bdev->bd_disk, partno);
+	mutex_lock(&bdev->bd_disk->open_mutex);
+	part = xa_load(&bdev->bd_disk->part_tbl, partno);
 	if (!part)
-		return -ENXIO;
+		goto out_unlock;
 
-	mutex_lock(&bdev->bd_disk->open_mutex);
 	ret = -EINVAL;
 	if (start != part->bd_start_sect)
 		goto out_unlock;
@@ -509,7 +509,6 @@ int bdev_resize_partition(struct block_device *bdev, int partno,
 	ret = 0;
 out_unlock:
 	mutex_unlock(&bdev->bd_disk->open_mutex);
-	bdput(part);
 	return ret;
 }
 
diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index 4c4d903caa09..5043e5d9436a 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -222,7 +222,6 @@ static inline void add_disk_no_queue_reg(struct gendisk *disk)
 }
 
 extern void del_gendisk(struct gendisk *gp);
-extern struct block_device *bdget_disk(struct gendisk *disk, int partno);
 
 void set_disk_ro(struct gendisk *disk, bool read_only);
 
-- 
2.30.2


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/8] block: split __blkdev_get
  2021-05-12  6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
@ 2021-05-19  9:21   ` Ming Lei
  2021-05-19 13:23     ` Christoph Hellwig
  0 siblings, 1 reply; 12+ messages in thread
From: Ming Lei @ 2021-05-19  9:21 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Song Liu, Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

On Wed, May 12, 2021 at 08:18:49AM +0200, Christoph Hellwig wrote:
> Split __blkdev_get into one helper for the whole device, and one for
> opening partitions.  This removes the (bounded) recursion when opening
> a partition.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>

Nice cleanup, now the blkdev get code becomes more readable than before:

Reviewed-by: Ming Lei <ming.lei@redhat.com>

-- 
Ming


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put
  2021-05-12  6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
@ 2021-05-19  9:32   ` Ming Lei
  0 siblings, 0 replies; 12+ messages in thread
From: Ming Lei @ 2021-05-19  9:32 UTC (permalink / raw)
  To: Christoph Hellwig
  Cc: Jens Axboe, Song Liu, Konrad Rzeszutek Wilk, Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

On Wed, May 12, 2021 at 08:18:50AM +0200, Christoph Hellwig wrote:
> Do the early unlocked syncing even earlier to move more code out of
> the recursive path.
> 
> Signed-off-by: Christoph Hellwig <hch@lst.de>
> ---
>  fs/block_dev.c | 20 ++++++++++----------
>  1 file changed, 10 insertions(+), 10 deletions(-)
> 
> diff --git a/fs/block_dev.c b/fs/block_dev.c
> index d053510d2f6a..95fde785dae7 100644
> --- a/fs/block_dev.c
> +++ b/fs/block_dev.c
> @@ -1553,16 +1553,6 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part)
>  	struct gendisk *disk = bdev->bd_disk;
>  	struct block_device *victim = NULL;
>  
> -	/*
> -	 * Sync early if it looks like we're the last one.  If someone else
> -	 * opens the block device between now and the decrement of bd_openers
> -	 * then we did a sync that we didn't need to, but that's not the end
> -	 * of the world and we want to avoid long (could be several minute)
> -	 * syncs while holding the mutex.
> -	 */
> -	if (bdev->bd_openers == 1)
> -		sync_blockdev(bdev);
> -
>  	mutex_lock_nested(&bdev->bd_mutex, for_part);
>  	if (for_part)
>  		bdev->bd_part_count--;
> @@ -1589,6 +1579,16 @@ void blkdev_put(struct block_device *bdev, fmode_t mode)
>  {
>  	struct gendisk *disk = bdev->bd_disk;
>  
> +	/*
> +	 * Sync early if it looks like we're the last one.  If someone else
> +	 * opens the block device between now and the decrement of bd_openers
> +	 * then we did a sync that we didn't need to, but that's not the end
> +	 * of the world and we want to avoid long (could be several minute)
> +	 * syncs while holding the mutex.
> +	 */
> +	if (bdev->bd_openers == 1)
> +		sync_blockdev(bdev);
> +

The early sync on disk is killed in case of closing partition, but there
shouldn't much dirty data on disk, so looks fine:

Reviewed-by: Ming Lei <ming.lei@redhat.com>

--
Ming


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/8] block: split __blkdev_get
  2021-05-19  9:21   ` Ming Lei
@ 2021-05-19 13:23     ` Christoph Hellwig
  0 siblings, 0 replies; 12+ messages in thread
From: Christoph Hellwig @ 2021-05-19 13:23 UTC (permalink / raw)
  To: Ming Lei
  Cc: Christoph Hellwig, Jens Axboe, Song Liu, Konrad Rzeszutek Wilk,
	Roger Pau Monné,
	Minchan Kim, Nitin Gupta, Stefan Haberland, Jan Hoeppner,
	linux-block, linux-raid, linux-s390, linux-scsi

On Wed, May 19, 2021 at 05:21:05PM +0800, Ming Lei wrote:
> Nice cleanup, now the blkdev get code becomes more readable than before:

Note that this will need a rebase on top of the partition rescan fix.
I'll send that out once Jens has merged the fix.

^ permalink raw reply	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2021-05-19 13:23 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
2021-05-12  6:18 move bd_mutex to the gendisk (resend) Christoph Hellwig
2021-05-12  6:18 ` [PATCH 1/8] block: split __blkdev_get Christoph Hellwig
2021-05-19  9:21   ` Ming Lei
2021-05-19 13:23     ` Christoph Hellwig
2021-05-12  6:18 ` [PATCH 2/8] block: move sync_blockdev from __blkdev_put to blkdev_put Christoph Hellwig
2021-05-19  9:32   ` Ming Lei
2021-05-12  6:18 ` [PATCH 3/8] block: move bd_mutex to struct gendisk Christoph Hellwig
2021-05-12  6:18 ` [PATCH 4/8] block: move adjusting bd_part_count out of __blkdev_get Christoph Hellwig
2021-05-12  6:18 ` [PATCH 5/8] block: split __blkdev_put Christoph Hellwig
2021-05-12  6:18 ` [PATCH 6/8] block: move bd_part_count to struct gendisk Christoph Hellwig
2021-05-12  6:18 ` [PATCH 7/8] block: factor out a part_devt helper Christoph Hellwig
2021-05-12  6:18 ` [PATCH 8/8] block: remove bdget_disk Christoph Hellwig

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).