All of lore.kernel.org
 help / color / mirror / Atom feed
From: Christoph Hellwig <hch@lst.de>
To: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	David Sterba <dsterba@suse.com>
Cc: Damien Le Moal <damien.lemoal@wdc.com>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	Johannes Thumshirn <johannes.thumshirn@wdc.com>,
	Qu Wenruo <wqu@suse.com>, Jens Axboe <axboe@kernel.dk>,
	"Darrick J. Wong" <djwong@kernel.org>,
	linux-block@vger.kernel.org, linux-btrfs@vger.kernel.org,
	linux-fsdevel@vger.kernel.org
Subject: [PATCH 32/34] btrfs: calculate file system wide queue limit for zoned mode
Date: Sat, 21 Jan 2023 07:50:29 +0100	[thread overview]
Message-ID: <20230121065031.1139353-33-hch@lst.de> (raw)
In-Reply-To: <20230121065031.1139353-1-hch@lst.de>

To be able to split a write into properly sized zone append commands,
we need a queue_limits structure that contains the least common
denominator suitable for all devices.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Josef Bacik <josef@toxicpanda.com>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
---
 fs/btrfs/fs.h    |  5 ++++-
 fs/btrfs/zoned.c | 52 ++++++++++++++++++++++++------------------------
 fs/btrfs/zoned.h |  1 -
 3 files changed, 30 insertions(+), 28 deletions(-)

diff --git a/fs/btrfs/fs.h b/fs/btrfs/fs.h
index 3d8156fc8523f2..4c477eae689148 100644
--- a/fs/btrfs/fs.h
+++ b/fs/btrfs/fs.h
@@ -3,6 +3,7 @@
 #ifndef BTRFS_FS_H
 #define BTRFS_FS_H
 
+#include <linux/blkdev.h>
 #include <linux/fs.h>
 #include <linux/btrfs_tree.h>
 #include <linux/sizes.h>
@@ -748,8 +749,10 @@ struct btrfs_fs_info {
 	 */
 	u64 zone_size;
 
-	/* Max size to emit ZONE_APPEND write command */
+	/* Constraints for ZONE_APPEND commands: */
+	struct queue_limits limits;
 	u64 max_zone_append_size;
+
 	struct mutex zoned_meta_io_lock;
 	spinlock_t treelog_bg_lock;
 	u64 treelog_bg;
diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c
index 5bf67c3c9f846f..d6a8f8a07d7581 100644
--- a/fs/btrfs/zoned.c
+++ b/fs/btrfs/zoned.c
@@ -421,25 +421,6 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device, bool populate_cache)
 	nr_sectors = bdev_nr_sectors(bdev);
 	zone_info->zone_size_shift = ilog2(zone_info->zone_size);
 	zone_info->nr_zones = nr_sectors >> ilog2(zone_sectors);
-	/*
-	 * We limit max_zone_append_size also by max_segments *
-	 * PAGE_SIZE. Technically, we can have multiple pages per segment. But,
-	 * since btrfs adds the pages one by one to a bio, and btrfs cannot
-	 * increase the metadata reservation even if it increases the number of
-	 * extents, it is safe to stick with the limit.
-	 *
-	 * With the zoned emulation, we can have non-zoned device on the zoned
-	 * mode. In this case, we don't have a valid max zone append size. So,
-	 * use max_segments * PAGE_SIZE as the pseudo max_zone_append_size.
-	 */
-	if (bdev_is_zoned(bdev)) {
-		zone_info->max_zone_append_size = min_t(u64,
-			(u64)bdev_max_zone_append_sectors(bdev) << SECTOR_SHIFT,
-			(u64)bdev_max_segments(bdev) << PAGE_SHIFT);
-	} else {
-		zone_info->max_zone_append_size =
-			(u64)bdev_max_segments(bdev) << PAGE_SHIFT;
-	}
 	if (!IS_ALIGNED(nr_sectors, zone_sectors))
 		zone_info->nr_zones++;
 
@@ -719,9 +700,9 @@ static int btrfs_check_for_zoned_device(struct btrfs_fs_info *fs_info)
 
 int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 {
+	struct queue_limits *lim = &fs_info->limits;
 	struct btrfs_device *device;
 	u64 zone_size = 0;
-	u64 max_zone_append_size = 0;
 	int ret;
 
 	/*
@@ -731,6 +712,8 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 	if (!btrfs_fs_incompat(fs_info, ZONED))
 		return btrfs_check_for_zoned_device(fs_info);
 
+	blk_set_stacking_limits(lim);
+
 	list_for_each_entry(device, &fs_info->fs_devices->devices, dev_list) {
 		struct btrfs_zoned_device_info *zone_info = device->zone_info;
 
@@ -745,10 +728,17 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 				  zone_info->zone_size, zone_size);
 			return -EINVAL;
 		}
-		if (!max_zone_append_size ||
-		    (zone_info->max_zone_append_size &&
-		     zone_info->max_zone_append_size < max_zone_append_size))
-			max_zone_append_size = zone_info->max_zone_append_size;
+
+		/*
+		 * With the zoned emulation, we can have non-zoned device on the
+		 * zoned mode. In this case, we don't have a valid max zone
+		 * append size.
+		 */
+		if (bdev_is_zoned(device->bdev)) {
+			blk_stack_limits(lim,
+					 &bdev_get_queue(device->bdev)->limits,
+					 0);
+		}
 	}
 
 	/*
@@ -769,8 +759,18 @@ int btrfs_check_zoned_mode(struct btrfs_fs_info *fs_info)
 	}
 
 	fs_info->zone_size = zone_size;
-	fs_info->max_zone_append_size = ALIGN_DOWN(max_zone_append_size,
-						   fs_info->sectorsize);
+	/*
+	 * Also limit max_zone_append_size by max_segments * PAGE_SIZE.
+	 * Technically, we can have multiple pages per segment. But,
+	 * since btrfs adds the pages one by one to a bio, and btrfs cannot
+	 * increase the metadata reservation even if it increases the number of
+	 * extents, it is safe to stick with the limit.
+	 */
+	fs_info->max_zone_append_size = ALIGN_DOWN(
+		min3((u64)lim->max_zone_append_sectors << SECTOR_SHIFT,
+		     (u64)lim->max_sectors << SECTOR_SHIFT,
+		     (u64)lim->max_segments << PAGE_SHIFT),
+		fs_info->sectorsize);
 	fs_info->fs_devices->chunk_alloc_policy = BTRFS_CHUNK_ALLOC_ZONED;
 	if (fs_info->max_zone_append_size < fs_info->max_extent_size)
 		fs_info->max_extent_size = fs_info->max_zone_append_size;
diff --git a/fs/btrfs/zoned.h b/fs/btrfs/zoned.h
index bc93a740e7cf34..f25f332b772859 100644
--- a/fs/btrfs/zoned.h
+++ b/fs/btrfs/zoned.h
@@ -20,7 +20,6 @@ struct btrfs_zoned_device_info {
 	 */
 	u64 zone_size;
 	u8  zone_size_shift;
-	u64 max_zone_append_size;
 	u32 nr_zones;
 	unsigned int max_active_zones;
 	atomic_t active_zones_left;
-- 
2.39.0


  parent reply	other threads:[~2023-01-21  6:52 UTC|newest]

Thread overview: 80+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-01-21  6:49 consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig
2023-01-21  6:49 ` [PATCH 01/34] block: export bio_split_rw Christoph Hellwig
2023-01-21  6:49 ` [PATCH 02/34] btrfs: better document struct btrfs_bio Christoph Hellwig
2023-01-22 10:19   ` Anand Jain
2023-01-23 15:25   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 03/34] btrfs: add a btrfs_inode pointer to " Christoph Hellwig
2023-01-22 10:20   ` Anand Jain
2023-01-23 15:26   ` Johannes Thumshirn
2023-01-23 15:47   ` Johannes Thumshirn
2023-03-07  1:44   ` Qu Wenruo
2023-03-07 14:41     ` Christoph Hellwig
2023-03-07 22:21       ` Qu Wenruo
2023-03-08  6:04         ` Qu Wenruo
2023-03-08 14:28           ` Christoph Hellwig
2023-03-09  0:08             ` Qu Wenruo
2023-03-09  9:31               ` Christoph Hellwig
2023-03-09 10:32                 ` Qu Wenruo
2023-03-09 15:18                   ` Christoph Hellwig
2023-01-21  6:50 ` [PATCH 04/34] btrfs: remove the direct I/O read checksum lookup optimization Christoph Hellwig
2023-01-23 15:59   ` Johannes Thumshirn
2023-01-24 14:55   ` Anand Jain
2023-01-24 19:55     ` Christoph Hellwig
2023-01-25  7:42       ` Anand Jain
2023-01-21  6:50 ` [PATCH 05/34] btrfs: simplify btrfs_lookup_bio_sums Christoph Hellwig
2023-01-23 16:06   ` Johannes Thumshirn
2023-01-24 15:16   ` Anand Jain
2023-01-21  6:50 ` [PATCH 06/34] btrfs: slightly refactor btrfs_submit_bio Christoph Hellwig
2023-01-23 16:13   ` Johannes Thumshirn
2023-01-24 15:27   ` Anand Jain
2023-01-21  6:50 ` [PATCH 07/34] btrfs: save the bio iter for checksum validation in common code Christoph Hellwig
2023-01-23 16:18   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 08/34] btrfs: pre-load data checksum for reads in btrfs_submit_bio Christoph Hellwig
2023-01-23 16:32   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 09/34] btrfs: add a btrfs_data_csum_ok helper Christoph Hellwig
2023-01-23 16:36   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 10/34] btrfs: handle checksum validation and repair at the storage layer Christoph Hellwig
2023-01-23 16:39   ` Johannes Thumshirn
2023-01-24  6:47     ` Christoph Hellwig
2023-01-24  8:16       ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 11/34] btrfs: remove btrfs_bio_free_csum Christoph Hellwig
2023-01-23 16:41   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 12/34] btrfs: remove btrfs_bio_for_each_sector Christoph Hellwig
2023-01-23 16:42   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 13/34] btrfs: remove now unused checksumming helpers Christoph Hellwig
2023-01-23 16:49   ` Johannes Thumshirn
2023-01-23 16:53     ` Christoph Hellwig
2023-01-24  8:33       ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 14/34] btrfs: remove the device field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:01   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 15/34] btrfs: remove the io_failure_record infrastructure Christoph Hellwig
2023-01-24 11:04   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 16/34] btrfs: rename the iter field in struct btrfs_bio Christoph Hellwig
2023-01-24 11:09   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 17/34] btrfs: remove the is_metadata flag " Christoph Hellwig
2023-01-24 11:13   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 18/34] btrfs: remove the submit_bio_start helpers Christoph Hellwig
2023-01-24 11:49   ` Johannes Thumshirn
2023-01-21  6:50 ` [PATCH 19/34] btrfs: simplify the btrfs_csum_one_bio calling convention Christoph Hellwig
2023-01-21  6:50 ` [PATCH 20/34] btrfs: handle checksum generation in the storage layer Christoph Hellwig
2023-01-21  6:50 ` [PATCH 21/34] btrfs: handle recording of zoned writes " Christoph Hellwig
2023-01-21  6:50 ` [PATCH 22/34] btrfs: support cloned bios in btree_csum_one_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 23/34] btrfs: allow btrfs_submit_bio to split bios Christoph Hellwig
2023-01-25 21:51   ` Josef Bacik
2023-01-26  5:21     ` Christoph Hellwig
2023-01-26 17:43       ` Josef Bacik
2023-01-26 17:46         ` Christoph Hellwig
2023-01-26 18:33           ` David Sterba
2023-01-27  7:02             ` test not in the auto group, was: " Christoph Hellwig
2023-01-21  6:50 ` [PATCH 24/34] btrfs: pass the iomap bio to btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 25/34] btrfs: remove stripe boundary calculation for buffered I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 26/34] btrfs: remove stripe boundary calculation for compressed I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 27/34] btrfs: remove stripe boundary calculation for encoded I/O Christoph Hellwig
2023-01-21  6:50 ` [PATCH 28/34] btrfs: remove struct btrfs_io_geometry Christoph Hellwig
2023-01-21  6:50 ` [PATCH 29/34] btrfs: remove submit_encoded_read_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 30/34] btrfs: remove the fs_info argument to btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 31/34] btrfs: remove now spurious bio submission helpers Christoph Hellwig
2023-01-21  6:50 ` Christoph Hellwig [this message]
2023-01-21  6:50 ` [PATCH 33/34] btrfs: split zone append bios in btrfs_submit_bio Christoph Hellwig
2023-01-21  6:50 ` [PATCH 34/34] iomap: remove IOMAP_F_ZONE_APPEND Christoph Hellwig
2023-01-24 13:22 ` consolidate btrfs checksumming, repair and bio splitting v4 Christoph Hellwig

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230121065031.1139353-33-hch@lst.de \
    --to=hch@lst.de \
    --cc=axboe@kernel.dk \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=djwong@kernel.org \
    --cc=dsterba@suse.com \
    --cc=johannes.thumshirn@wdc.com \
    --cc=josef@toxicpanda.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.