All of lore.kernel.org
 help / color / mirror / Atom feed
From: Damien Le Moal <dlemoal@kernel.org>
To: linux-block@vger.kernel.org, Jens Axboe <axboe@kernel.dk>,
	linux-scsi@vger.kernel.org,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	dm-devel@lists.linux.dev, Mike Snitzer <snitzer@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Subject: [PATCH v2 08/28] block: Use a mempool to allocate zone write plugs
Date: Mon, 25 Mar 2024 13:44:32 +0900	[thread overview]
Message-ID: <20240325044452.3125418-9-dlemoal@kernel.org> (raw)
In-Reply-To: <20240325044452.3125418-1-dlemoal@kernel.org>

Allocating zone write plugs using a struct kmem_cache does not guarantee
that enough write plugs can be allocated to simultaneously write up to
the maximum number of active zones of a zoned block device.

Avoid any issue with memory allocation by using a mempool with a size
equal to the disk maximum number of open zones or maximum number of
active zones, whichever is larger. For zoned devices that do not have
open or active zone limits, the default 128 is used as the mempool size.
If a change to the zone limits is detected, the mempool is resized in
blk_revalidate_disk_zones().

Signed-off-by: Damien Le Moal <dlemoal@kernel.org>
---
 block/blk-zoned.c      | 62 ++++++++++++++++++++++++++++++------------
 include/linux/blkdev.h |  3 ++
 2 files changed, 47 insertions(+), 18 deletions(-)

diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 39e66d64ea55..4e93293b1233 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -568,13 +568,14 @@ static struct blk_zone_wplug *disk_get_zone_wplug(struct gendisk *disk,
 	return zwplug;
 }
 
-static inline void disk_put_zone_wplug(struct blk_zone_wplug *zwplug)
+static inline void disk_put_zone_wplug(struct gendisk *disk,
+				       struct blk_zone_wplug *zwplug)
 {
 	if (atomic_dec_and_test(&zwplug->ref)) {
 		WARN_ON_ONCE(!bio_list_empty(&zwplug->bio_list));
 		WARN_ON_ONCE(!list_empty(&zwplug->err));
 
-		kmem_cache_free(blk_zone_wplugs_cachep, zwplug);
+		mempool_free(zwplug, disk->zone_wplugs_pool);
 	}
 }
 
@@ -599,14 +600,14 @@ static struct blk_zone_wplug *disk_get_zone_wplug_locked(struct gendisk *disk,
 		spin_lock_irqsave(&zwplug->lock, *flags);
 		if (zwplug->flags & BLK_ZONE_WPLUG_FREEING) {
 			spin_unlock_irqrestore(&zwplug->lock, *flags);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 			goto again;
 		}
 		return zwplug;
 	}
 
 	/* Allocate and insert a new zone write plug. */
-	zwplug = kmem_cache_alloc(blk_zone_wplugs_cachep, gfp_mask);
+	zwplug = mempool_alloc(disk->zone_wplugs_pool, gfp_mask);
 	if (!zwplug)
 		return NULL;
 
@@ -629,7 +630,7 @@ static struct blk_zone_wplug *disk_get_zone_wplug_locked(struct gendisk *disk,
 	 */
 	if (!disk_insert_zone_wplug(disk, zwplug)) {
 		spin_unlock_irqrestore(&zwplug->lock, *flags);
-		kmem_cache_free(blk_zone_wplugs_cachep, zwplug);
+		mempool_free(zwplug, disk->zone_wplugs_pool);
 		goto again;
 	}
 
@@ -659,13 +660,14 @@ static inline void blk_zone_wplug_bio_io_error(struct bio *bio)
 	blk_queue_exit(q);
 }
 
-static void disk_zone_wplug_abort(struct blk_zone_wplug *zwplug)
+static void disk_zone_wplug_abort(struct gendisk *disk,
+				  struct blk_zone_wplug *zwplug)
 {
 	struct bio *bio;
 
 	while ((bio = bio_list_pop(&zwplug->bio_list))) {
 		blk_zone_wplug_bio_io_error(bio);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 	}
 }
 
@@ -681,7 +683,7 @@ static void disk_zone_wplug_abort_unaligned(struct gendisk *disk,
 		if (wp_offset >= zone_capacity ||
 		     bio_offset_from_zone_start(bio) != wp_offset) {
 			blk_zone_wplug_bio_io_error(bio);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 			continue;
 		}
 
@@ -718,7 +720,7 @@ static void disk_zone_wplug_set_wp_offset(struct gendisk *disk,
 
 	/* Update the zone write pointer and abort all plugged BIOs. */
 	zwplug->wp_offset = wp_offset;
-	disk_zone_wplug_abort(zwplug);
+	disk_zone_wplug_abort(disk, zwplug);
 
 	/*
 	 * Updating the write pointer offset puts back the zone
@@ -765,7 +767,7 @@ static bool blk_zone_wplug_handle_reset_or_finish(struct bio *bio,
 	zwplug = disk_get_zone_wplug(disk, bio->bi_iter.bi_sector);
 	if (zwplug) {
 		disk_zone_wplug_set_wp_offset(disk, zwplug, wp_offset);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 	}
 
 	return false;
@@ -787,7 +789,7 @@ static bool blk_zone_wplug_handle_reset_all(struct bio *bio)
 		zwplug = disk_get_zone_wplug(disk, sector);
 		if (zwplug) {
 			disk_zone_wplug_set_wp_offset(disk, zwplug, 0);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 	}
 
@@ -1158,7 +1160,7 @@ void blk_zone_write_plug_bio_endio(struct bio *bio)
 	if (bio->bi_bdev->bd_has_submit_bio)
 		disk_zone_wplug_unplug_bio(disk, zwplug);
 
-	disk_put_zone_wplug(zwplug);
+	disk_put_zone_wplug(disk, zwplug);
 }
 
 void blk_zone_write_plug_complete_request(struct request *req)
@@ -1171,7 +1173,7 @@ void blk_zone_write_plug_complete_request(struct request *req)
 
 	disk_zone_wplug_unplug_bio(disk, zwplug);
 
-	disk_put_zone_wplug(zwplug);
+	disk_put_zone_wplug(disk, zwplug);
 }
 
 static void blk_zone_wplug_bio_work(struct work_struct *work)
@@ -1284,7 +1286,7 @@ static void disk_zone_wplug_handle_error(struct gendisk *disk,
 		 * plugged BIOs as otherwise we could endup waiting forever on
 		 * plugged BIOs to complete if there is a queue freeze on-going.
 		 */
-		disk_zone_wplug_abort(zwplug);
+		disk_zone_wplug_abort(disk, zwplug);
 		goto unplug;
 	}
 
@@ -1325,7 +1327,7 @@ static void disk_zone_wplugs_work(struct work_struct *work)
 		spin_unlock_irqrestore(&disk->zone_wplugs_lock, flags);
 
 		disk_zone_wplug_handle_error(disk, zwplug);
-		disk_put_zone_wplug(zwplug);
+		disk_put_zone_wplug(disk, zwplug);
 
 		spin_lock_irqsave(&disk->zone_wplugs_lock, flags);
 	}
@@ -1353,7 +1355,7 @@ static void disk_free_zone_wplugs(struct gendisk *disk)
 					     struct blk_zone_wplug, node);
 			blk_get_zone_wplug(zwplug);
 			disk_remove_zone_wplug(disk, zwplug);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 	}
 }
@@ -1369,7 +1371,7 @@ void disk_init_zone_resources(struct gendisk *disk)
  * For the size of a disk zone write plug hash table, use the disk maximum
  * open zones and maximum active zones limits, but do not exceed 4KB (512 hlist
  * head entries), that is, 9 bits. For a disk that has no limits, default to
- * 128 zones to hash.
+ * 128 zones for the mempool size and the hash size.
  */
 #define BLK_ZONE_MAX_WPLUG_HASH_BITS		9
 #define BLK_ZONE_DEFAULT_WPLUG_HASH_SIZE	128
@@ -1391,6 +1393,17 @@ static int disk_alloc_zone_resources(struct gendisk *disk,
 	for (i = 0; i < disk_zone_wplugs_hash_size(disk); i++)
 		INIT_HLIST_HEAD(&disk->zone_wplugs_hash[i]);
 
+	disk->zone_wplugs_pool =
+		mempool_create_slab_pool(hash_size, blk_zone_wplugs_cachep);
+	if (!disk->zone_wplugs_pool) {
+		kfree(disk->zone_wplugs_hash);
+		disk->zone_wplugs_hash = NULL;
+		disk->zone_wplugs_hash_bits = 0;
+		return -ENOMEM;
+	}
+
+	disk->zone_wplugs_pool_size = hash_size;
+
 	return 0;
 }
 
@@ -1404,6 +1417,10 @@ void disk_free_zone_resources(struct gendisk *disk)
 	disk->zone_wplugs_hash = NULL;
 	disk->zone_wplugs_hash_bits = 0;
 
+	mempool_destroy(disk->zone_wplugs_pool);
+	disk->zone_wplugs_pool = NULL;
+	disk->zone_wplugs_pool_size = 0;
+
 	kfree(disk->conv_zones_bitmap);
 	disk->conv_zones_bitmap = NULL;
 	kfree(disk->seq_zones_wlock);
@@ -1418,6 +1435,7 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
 {
 	struct queue_limits *lim = &disk->queue->limits;
 	unsigned int hash_size;
+	int ret;
 
 	hash_size = max(lim->max_open_zones, lim->max_active_zones);
 	if (!hash_size)
@@ -1427,6 +1445,14 @@ static int disk_revalidate_zone_resources(struct gendisk *disk,
 	if (!disk->zone_wplugs_hash)
 		return disk_alloc_zone_resources(disk, hash_size);
 
+	/* Resize the memory pool if needed. */
+	if (disk->zone_wplugs_pool_size != hash_size) {
+		ret = mempool_resize(disk->zone_wplugs_pool, hash_size);
+		if (ret)
+			return ret;
+		disk->zone_wplugs_pool_size = hash_size;
+	}
+
 	return 0;
 }
 
@@ -1526,7 +1552,7 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 			if (!zwplug)
 				return -ENOMEM;
 			spin_unlock_irqrestore(&zwplug->lock, flags);
-			disk_put_zone_wplug(zwplug);
+			disk_put_zone_wplug(disk, zwplug);
 		}
 
 		break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e9b670be338b..68c60039a7ea 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/xarray.h>
 #include <linux/file.h>
+#include <linux/mempool.h>
 
 struct module;
 struct request_queue;
@@ -194,6 +195,8 @@ struct gendisk {
 	unsigned int		zone_capacity;
 	unsigned long		*conv_zones_bitmap;
 	unsigned long		*seq_zones_wlock;
+	unsigned int		zone_wplugs_pool_size;
+	mempool_t		*zone_wplugs_pool;
 	unsigned int            zone_wplugs_hash_bits;
 	spinlock_t              zone_wplugs_lock;
 	struct hlist_head       *zone_wplugs_hash;
-- 
2.44.0


  parent reply	other threads:[~2024-03-25  4:45 UTC|newest]

Thread overview: 73+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-03-25  4:44 [PATCH v2 00/28] Zone write plugging Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 01/28] block: Restore sector of flush requests Damien Le Moal
2024-03-25 19:30   ` Bart Van Assche
2024-03-26  6:05   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 02/28] block: Remove req_bio_endio() Damien Le Moal
2024-03-25 19:39   ` Bart Van Assche
2024-03-26  1:54     ` Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 03/28] block: Introduce blk_zone_update_request_bio() Damien Le Moal
2024-03-25 19:52   ` Bart Van Assche
2024-03-25 23:23     ` Damien Le Moal
2024-03-26  6:37       ` Christoph Hellwig
2024-03-26  7:47         ` Damien Le Moal
2024-03-27  7:01   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 04/28] block: Introduce bio_straddle_zones() and bio_offset_from_zone_start() Damien Le Moal
2024-03-25 19:55   ` Bart Van Assche
2024-03-26  6:39   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 05/28] block: Allow using bio_attempt_back_merge() internally Damien Le Moal
2024-03-25 20:00   ` Bart Van Assche
2024-03-26  6:39   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 06/28] block: Remember zone capacity when revalidating zones Damien Le Moal
2024-03-25 21:53   ` Bart Van Assche
2024-03-25 23:20     ` Damien Le Moal
2024-03-26  6:40   ` Christoph Hellwig
2024-03-27  7:05   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 07/28] block: Introduce zone write plugging Damien Le Moal
2024-03-25 21:53   ` Bart Van Assche
2024-03-26  3:12     ` Damien Le Moal
2024-03-26  6:51       ` Christoph Hellwig
2024-03-26 17:23       ` Bart Van Assche
2024-03-27  7:18   ` Hannes Reinecke
2024-03-25  4:44 ` Damien Le Moal [this message]
2024-03-27  7:19   ` [PATCH v2 08/28] block: Use a mempool to allocate zone write plugs Hannes Reinecke
2024-03-27  7:22     ` Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 09/28] block: Fake max open zones limit when there is no limit Damien Le Moal
2024-03-26  6:57   ` Christoph Hellwig
2024-03-27  7:21   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 10/28] block: Allow zero value of max_zone_append_sectors queue limit Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 11/28] block: Implement zone append emulation Damien Le Moal
2024-03-27  7:28   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 12/28] block: Allow BIO-based drivers to use blk_revalidate_disk_zones() Damien Le Moal
2024-03-26  7:08   ` Christoph Hellwig
2024-03-26  8:12     ` Damien Le Moal
2024-03-27  7:29   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 13/28] dm: Use the block layer zone append emulation Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 14/28] scsi: sd: " Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 15/28] ublk_drv: Do not request ELEVATOR_F_ZBD_SEQ_WRITE elevator feature Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 16/28] null_blk: " Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 17/28] null_blk: Introduce zone_append_max_sectors attribute Damien Le Moal
2024-03-27  7:31   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 18/28] null_blk: Introduce fua attribute Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 19/28] nvmet: zns: Do not reference the gendisk conv_zones_bitmap Damien Le Moal
2024-03-26  6:45   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 20/28] block: Remove BLK_STS_ZONE_RESOURCE Damien Le Moal
2024-03-26  6:45   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 21/28] block: Simplify blk_revalidate_disk_zones() interface Damien Le Moal
2024-03-26  6:45   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 22/28] block: mq-deadline: Remove support for zone write locking Damien Le Moal
2024-03-25 22:13   ` Bart Van Assche
2024-03-25  4:44 ` [PATCH v2 23/28] block: Remove elevator required features Damien Le Moal
2024-03-26  6:45   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 24/28] block: Do not check zone type in blk_check_zone_append() Damien Le Moal
2024-03-26  6:46   ` Christoph Hellwig
2024-03-25  4:44 ` [PATCH v2 25/28] block: Move zone related debugfs attribute to blk-zoned.c Damien Le Moal
2024-03-25 22:20   ` Bart Van Assche
2024-03-25 23:17     ` Damien Le Moal
2024-03-25  4:44 ` [PATCH v2 26/28] block: Remove zone write locking Damien Le Moal
2024-03-25 22:27   ` Bart Van Assche
2024-03-27  7:32   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 27/28] block: Do not force select mq-deadline with CONFIG_BLK_DEV_ZONED Damien Le Moal
2024-03-25 22:29   ` Bart Van Assche
2024-03-27  7:33   ` Hannes Reinecke
2024-03-25  4:44 ` [PATCH v2 28/28] block: Do not special-case plugging of zone write operations Damien Le Moal
2024-03-26 14:11 [PATCH v2 08/28] block: Use a mempool to allocate zone write plugs kernel test robot

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240325044452.3125418-9-dlemoal@kernel.org \
    --to=dlemoal@kernel.org \
    --cc=axboe@kernel.dk \
    --cc=dm-devel@lists.linux.dev \
    --cc=hch@lst.de \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.