All of lore.kernel.org
 help / color / mirror / Atom feed
From: Pankaj Raghav <p.raghav@samsung.com>
To: jaegeuk@kernel.org, hare@suse.de, dsterba@suse.com,
	axboe@kernel.dk, hch@lst.de, damien.lemoal@opensource.wdc.com,
	snitzer@kernel.org
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	bvanassche@acm.org, linux-fsdevel@vger.kernel.org,
	matias.bjorling@wdc.com, Jens Axboe <axboe@fb.com>,
	gost.dev@samsung.com, jonathan.derrick@linux.dev,
	jiangbo.365@bytedance.com, linux-nvme@lists.infradead.org,
	dm-devel@redhat.com, Naohiro Aota <naohiro.aota@wdc.com>,
	linux-kernel@vger.kernel.org, Johannes Thumshirn <jth@kernel.org>,
	Sagi Grimberg <sagi@grimberg.me>,
	Alasdair Kergon <agk@redhat.com>,
	linux-block@vger.kernel.org, Chaitanya Kulkarni <kch@nvidia.com>,
	Keith Busch <kbusch@kernel.org>,
	linux-btrfs@vger.kernel.org, Pankaj Raghav <p.raghav@samsung.com>,
	Luis Chamberlain <mcgrof@kernel.org>
Subject: [PATCH v3 02/11] block: allow blk-zoned devices to have non-power-of-2 zone size
Date: Fri,  6 May 2022 10:10:56 +0200	[thread overview]
Message-ID: <20220506081105.29134-3-p.raghav@samsung.com> (raw)
In-Reply-To: <20220506081105.29134-1-p.raghav@samsung.com>

Checking if a given sector is aligned to a zone is a common
operation that is performed for zoned devices. Add
blk_queue_is_zone_start helper to check for this instead of opencoding it
everywhere.

Convert the calculations on zone size to be generic instead of relying on
power_of_2 based logic in the block layer using the helpers wherever
possible.

The only hot path affected by this change for power_of_2 zoned devices
is in blk_check_zone_append() but blk_queue_is_zone_start() helper is
used to optimize the calculation for po2 zone sizes. Note that the append
path cannot be accessed by direct raw access to the block device but only
through a filesystem abstraction.

Finally, allow non power of 2 zoned devices provided that their zone
capacity and zone size are equal. The main motivation to allow non
power_of_2 zoned device is to remove the unmapped LBA between zcap and
zsze for devices that cannot have a power_of_2 zcap.

Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 block/blk-core.c       |  3 +--
 block/blk-zoned.c      | 27 +++++++++++++++++++++------
 include/linux/blkdev.h | 22 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f305cb66c..b7051b7ea 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -634,8 +634,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 		return BLK_STS_NOTSUPP;
 
 	/* The bio sector must point to the start of a sequential zone */
-	if (pos & (blk_queue_zone_sectors(q) - 1) ||
-	    !blk_queue_zone_is_seq(q, pos))
+	if (!blk_queue_is_zone_start(q, pos) || !blk_queue_zone_is_seq(q, pos))
 		return BLK_STS_IOERR;
 
 	/*
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 140230134..cfc2fb804 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -289,10 +289,10 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 		return -EINVAL;
 
 	/* Check alignment (handle eventual smaller last zone) */
-	if (sector & (zone_sectors - 1))
+	if (!blk_queue_is_zone_start(q, sector))
 		return -EINVAL;
 
-	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
+	if (!blk_queue_is_zone_start(q, nr_sectors) && end_sector != capacity)
 		return -EINVAL;
 
 	/*
@@ -490,14 +490,29 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 	 * smaller last zone.
 	 */
 	if (zone->start == 0) {
-		if (zone->len == 0 || !is_power_of_2(zone->len)) {
-			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
-				disk->disk_name, zone->len);
+		if (zone->len == 0) {
+			pr_warn("%s: Invalid zone size",
+				disk->disk_name);
+			return -ENODEV;
+		}
+
+		/*
+		 * Don't allow zoned device with non power_of_2 zone size with
+		 * zone capacity less than zone size.
+		 */
+		if (!is_power_of_2(zone->len) &&
+		    zone->capacity < zone->len) {
+			pr_warn("%s: Invalid zoned size with non power of 2 zone size and zone capacity < zone size",
+				disk->disk_name);
 			return -ENODEV;
 		}
 
 		args->zone_sectors = zone->len;
-		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
+		/*
+		 * Division is used to calculate nr_zones for both power_of_2
+		 * and non power_of_2 zone sizes as it is not in the hot path.
+		 */
+		args->nr_zones = div64_u64(capacity + zone->len - 1, zone->len);
 	} else if (zone->start + args->zone_sectors < capacity) {
 		if (zone->len != args->zone_sectors) {
 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22fe512ee..32d7bd7b1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -686,6 +686,22 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 	return div64_u64(sector, zone_sectors);
 }
 
+static inline bool blk_queue_is_zone_start(struct request_queue *q, sector_t sec)
+{
+	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	u64 remainder = 0;
+
+	if (!blk_queue_is_zoned(q))
+		return false;
+
+	if (is_power_of_2(zone_sectors))
+		return IS_ALIGNED(sec, zone_sectors);
+
+	div64_u64_rem(sec, zone_sectors, &remainder);
+	/* if there is a remainder, then the sector is not aligned */
+	return remainder == 0;
+}
+
 static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 					 sector_t sector)
 {
@@ -732,6 +748,12 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 {
 	return 0;
 }
+
+static inline bool blk_queue_is_zone_start(struct request_queue *q, sector_t sec)
+{
+	return false;
+}
+
 static inline unsigned int queue_max_open_zones(const struct request_queue *q)
 {
 	return 0;
-- 
2.25.1


WARNING: multiple messages have this Message-ID (diff)
From: Pankaj Raghav <p.raghav@samsung.com>
To: jaegeuk@kernel.org, hare@suse.de, dsterba@suse.com,
	axboe@kernel.dk, hch@lst.de, damien.lemoal@opensource.wdc.com,
	snitzer@kernel.org
Cc: jiangbo.365@bytedance.com, linux-nvme@lists.infradead.org,
	Chris Mason <clm@fb.com>,
	dm-devel@redhat.com, Alasdair Kergon <agk@redhat.com>,
	Naohiro Aota <naohiro.aota@wdc.com>,
	bvanassche@acm.org, gost.dev@samsung.com,
	jonathan.derrick@linux.dev, Pankaj Raghav <p.raghav@samsung.com>,
	Chaitanya Kulkarni <kch@nvidia.com>,
	Josef Bacik <josef@toxicpanda.com>,
	linux-block@vger.kernel.org, Keith Busch <kbusch@kernel.org>,
	matias.bjorling@wdc.com, Sagi Grimberg <sagi@grimberg.me>,
	Jens Axboe <axboe@fb.com>,
	linux-kernel@vger.kernel.org,
	Luis Chamberlain <mcgrof@kernel.org>,
	linux-fsdevel@vger.kernel.org,
	Johannes Thumshirn <jth@kernel.org>,
	linux-btrfs@vger.kernel.org
Subject: [dm-devel] [PATCH v3 02/11] block: allow blk-zoned devices to have non-power-of-2 zone size
Date: Fri,  6 May 2022 10:10:56 +0200	[thread overview]
Message-ID: <20220506081105.29134-3-p.raghav@samsung.com> (raw)
In-Reply-To: <20220506081105.29134-1-p.raghav@samsung.com>

Checking if a given sector is aligned to a zone is a common
operation that is performed for zoned devices. Add
blk_queue_is_zone_start helper to check for this instead of opencoding it
everywhere.

Convert the calculations on zone size to be generic instead of relying on
power_of_2 based logic in the block layer using the helpers wherever
possible.

The only hot path affected by this change for power_of_2 zoned devices
is in blk_check_zone_append() but blk_queue_is_zone_start() helper is
used to optimize the calculation for po2 zone sizes. Note that the append
path cannot be accessed by direct raw access to the block device but only
through a filesystem abstraction.

Finally, allow non power of 2 zoned devices provided that their zone
capacity and zone size are equal. The main motivation to allow non
power_of_2 zoned device is to remove the unmapped LBA between zcap and
zsze for devices that cannot have a power_of_2 zcap.

Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Pankaj Raghav <p.raghav@samsung.com>
---
 block/blk-core.c       |  3 +--
 block/blk-zoned.c      | 27 +++++++++++++++++++++------
 include/linux/blkdev.h | 22 ++++++++++++++++++++++
 3 files changed, 44 insertions(+), 8 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index f305cb66c..b7051b7ea 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -634,8 +634,7 @@ static inline blk_status_t blk_check_zone_append(struct request_queue *q,
 		return BLK_STS_NOTSUPP;
 
 	/* The bio sector must point to the start of a sequential zone */
-	if (pos & (blk_queue_zone_sectors(q) - 1) ||
-	    !blk_queue_zone_is_seq(q, pos))
+	if (!blk_queue_is_zone_start(q, pos) || !blk_queue_zone_is_seq(q, pos))
 		return BLK_STS_IOERR;
 
 	/*
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 140230134..cfc2fb804 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -289,10 +289,10 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
 		return -EINVAL;
 
 	/* Check alignment (handle eventual smaller last zone) */
-	if (sector & (zone_sectors - 1))
+	if (!blk_queue_is_zone_start(q, sector))
 		return -EINVAL;
 
-	if ((nr_sectors & (zone_sectors - 1)) && end_sector != capacity)
+	if (!blk_queue_is_zone_start(q, nr_sectors) && end_sector != capacity)
 		return -EINVAL;
 
 	/*
@@ -490,14 +490,29 @@ static int blk_revalidate_zone_cb(struct blk_zone *zone, unsigned int idx,
 	 * smaller last zone.
 	 */
 	if (zone->start == 0) {
-		if (zone->len == 0 || !is_power_of_2(zone->len)) {
-			pr_warn("%s: Invalid zoned device with non power of two zone size (%llu)\n",
-				disk->disk_name, zone->len);
+		if (zone->len == 0) {
+			pr_warn("%s: Invalid zone size",
+				disk->disk_name);
+			return -ENODEV;
+		}
+
+		/*
+		 * Don't allow zoned device with non power_of_2 zone size with
+		 * zone capacity less than zone size.
+		 */
+		if (!is_power_of_2(zone->len) &&
+		    zone->capacity < zone->len) {
+			pr_warn("%s: Invalid zoned size with non power of 2 zone size and zone capacity < zone size",
+				disk->disk_name);
 			return -ENODEV;
 		}
 
 		args->zone_sectors = zone->len;
-		args->nr_zones = (capacity + zone->len - 1) >> ilog2(zone->len);
+		/*
+		 * Division is used to calculate nr_zones for both power_of_2
+		 * and non power_of_2 zone sizes as it is not in the hot path.
+		 */
+		args->nr_zones = div64_u64(capacity + zone->len - 1, zone->len);
 	} else if (zone->start + args->zone_sectors < capacity) {
 		if (zone->len != args->zone_sectors) {
 			pr_warn("%s: Invalid zoned device with non constant zone size\n",
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 22fe512ee..32d7bd7b1 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -686,6 +686,22 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 	return div64_u64(sector, zone_sectors);
 }
 
+static inline bool blk_queue_is_zone_start(struct request_queue *q, sector_t sec)
+{
+	sector_t zone_sectors = blk_queue_zone_sectors(q);
+	u64 remainder = 0;
+
+	if (!blk_queue_is_zoned(q))
+		return false;
+
+	if (is_power_of_2(zone_sectors))
+		return IS_ALIGNED(sec, zone_sectors);
+
+	div64_u64_rem(sec, zone_sectors, &remainder);
+	/* if there is a remainder, then the sector is not aligned */
+	return remainder == 0;
+}
+
 static inline bool blk_queue_zone_is_seq(struct request_queue *q,
 					 sector_t sector)
 {
@@ -732,6 +748,12 @@ static inline unsigned int blk_queue_zone_no(struct request_queue *q,
 {
 	return 0;
 }
+
+static inline bool blk_queue_is_zone_start(struct request_queue *q, sector_t sec)
+{
+	return false;
+}
+
 static inline unsigned int queue_max_open_zones(const struct request_queue *q)
 {
 	return 0;
-- 
2.25.1

--
dm-devel mailing list
dm-devel@redhat.com
https://listman.redhat.com/mailman/listinfo/dm-devel


  parent reply	other threads:[~2022-05-06  8:11 UTC|newest]

Thread overview: 52+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
     [not found] <CGME20220506081106eucas1p181e83ef352eb8bfb1752bee0cf84020f@eucas1p1.samsung.com>
2022-05-06  8:10 ` [PATCH v3 00/11] support non power of 2 zoned devices Pankaj Raghav
2022-05-06  8:10   ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081107eucas1p1070e00b208e00090c235017435be1593@eucas1p1.samsung.com>
2022-05-06  8:10     ` [PATCH v3 01/11] block: make blkdev_nr_zones and blk_queue_zone_no generic for npo2 zsze Pankaj Raghav
2022-05-06  8:10       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081108eucas1p2ca72ccafb05dfdcc5b8ba9393da1ce60@eucas1p2.samsung.com>
2022-05-06  8:10     ` Pankaj Raghav [this message]
2022-05-06  8:10       ` [dm-devel] [PATCH v3 02/11] block: allow blk-zoned devices to have non-power-of-2 zone size Pankaj Raghav
     [not found]   ` <CGME20220506081109eucas1p26bbb68a1740b1af923ed862a93112780@eucas1p2.samsung.com>
2022-05-06  8:10     ` [PATCH v3 03/11] nvme: zns: Allow ZNS drives that have non-power_of_2 " Pankaj Raghav
2022-05-06  8:10       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081110eucas1p1b6c624ddca1c41b9838bb5b85f8ca5ff@eucas1p1.samsung.com>
2022-05-06  8:10     ` [PATCH v3 04/11] nvmet: Allow ZNS target to support non-power_of_2 zone sizes Pankaj Raghav
2022-05-06  8:10       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081111eucas1p11e4dd5a89ce49939bbea57433cea046f@eucas1p1.samsung.com>
2022-05-06  8:10     ` [PATCH v3 05/11] btrfs: zoned: Cache superblock location in btrfs_zoned_device_info Pankaj Raghav
2022-05-06  8:10       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081112eucas1p2f6116cb713749c259a6da533df9c2505@eucas1p2.samsung.com>
2022-05-06  8:11     ` [PATCH v3 06/11] btrfs: zoned: Make sb_zone_number function non power of 2 compatible Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081113eucas1p25deb73a4b7898476d2e8e3d35b16f879@eucas1p2.samsung.com>
2022-05-06  8:11     ` [PATCH v3 07/11] btrfs: zoned: use generic btrfs zone helpers to support npo2 zoned devices Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081114eucas1p1a9d86eb429a6f68c29d1980891f49786@eucas1p1.samsung.com>
2022-05-06  8:11     ` [PATCH v3 08/11] btrfs: zoned: relax the alignment constraint for " Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081115eucas1p2e7bed137c74be42a702732027581330e@eucas1p2.samsung.com>
2022-05-06  8:11     ` [PATCH v3 09/11] zonefs: allow non power of 2 " Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
     [not found]   ` <CGME20220506081116eucas1p2cce67bbf30f4c9c4e6854965be41b098@eucas1p2.samsung.com>
2022-05-06  8:11     ` [PATCH v3 10/11] null_blk: " Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
2022-05-06 15:47       ` Damien Le Moal
2022-05-06 15:47         ` [dm-devel] " Damien Le Moal
2022-05-09 11:06         ` Pankaj Raghav
2022-05-09 11:06           ` [dm-devel] " Pankaj Raghav
2022-05-09 11:31           ` Damien Le Moal
2022-05-09 11:31             ` [dm-devel] " Damien Le Moal
2022-05-09 11:56             ` Pankaj Raghav
2022-05-09 11:56               ` [dm-devel] " Pankaj Raghav
2022-05-12 17:22               ` Bart Van Assche
2022-05-12 17:22                 ` [dm-devel] " Bart Van Assche
     [not found]   ` <CGME20220506081118eucas1p17f3c29cc36d748c3b5a3246f069f434a@eucas1p1.samsung.com>
2022-05-06  8:11     ` [PATCH v3 11/11] dm-zoned: ensure only power of 2 zone sizes are allowed Pankaj Raghav
2022-05-06  8:11       ` [dm-devel] " Pankaj Raghav
2022-05-06 15:41       ` Damien Le Moal
2022-05-06 15:41         ` [dm-devel] " Damien Le Moal
2022-05-09 11:03         ` Pankaj Raghav
2022-05-09 11:03           ` [dm-devel] " Pankaj Raghav
2022-05-09 16:05           ` Mike Snitzer
2022-05-09 16:05             ` [dm-devel] " Mike Snitzer
2022-05-09 18:54       ` David Sterba
2022-05-09 18:54         ` [dm-devel] " David Sterba
2022-05-11 14:39         ` Pankaj Raghav
2022-05-11 14:39           ` [dm-devel] " Pankaj Raghav
2022-05-11 16:00           ` David Sterba
2022-05-11 16:00             ` [dm-devel] " David Sterba
2022-05-12  8:27             ` Pankaj Raghav
2022-05-12  8:27               ` [dm-devel] " Pankaj Raghav
2022-05-06 10:00   ` [PATCH v3 00/11] support non power of 2 zoned devices David Sterba
2022-05-06 10:00     ` [dm-devel] " David Sterba
2022-05-09 11:02     ` Pankaj Raghav
2022-05-09 11:02       ` [dm-devel] " Pankaj Raghav

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220506081105.29134-3-p.raghav@samsung.com \
    --to=p.raghav@samsung.com \
    --cc=agk@redhat.com \
    --cc=axboe@fb.com \
    --cc=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=clm@fb.com \
    --cc=damien.lemoal@opensource.wdc.com \
    --cc=dm-devel@redhat.com \
    --cc=dsterba@suse.com \
    --cc=gost.dev@samsung.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jaegeuk@kernel.org \
    --cc=jiangbo.365@bytedance.com \
    --cc=jonathan.derrick@linux.dev \
    --cc=josef@toxicpanda.com \
    --cc=jth@kernel.org \
    --cc=kbusch@kernel.org \
    --cc=kch@nvidia.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=matias.bjorling@wdc.com \
    --cc=mcgrof@kernel.org \
    --cc=naohiro.aota@wdc.com \
    --cc=sagi@grimberg.me \
    --cc=snitzer@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.