Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, David Sterba <dsterba@suse.com>
Cc: "Chris Mason" <clm@fb.com>, "Josef Bacik" <josef@toxicpanda.com>,
	"Qu Wenruo" <wqu@suse.com>, "Nikolay Borisov" <nborisov@suse.com>,
	linux-kernel@vger.kernel.org, "Hannes Reinecke" <hare@suse.com>,
	linux-fsdevel@vger.kernel.org,
	"Damien Le Moal" <damien.lemoal@wdc.com>,
	"Matias Bjørling" <mb@lightnvm.io>,
	"Johannes Thumshirn" <jthumshirn@suse.de>,
	"Bart Van Assche" <bvanassche@acm.org>,
	"Naohiro Aota" <naohiro.aota@wdc.com>
Subject: [PATCH 07/12] btrfs-progs: support discarding zoned device
Date: Fri,  7 Jun 2019 22:17:46 +0900
Message-ID: <20190607131751.5359-7-naohiro.aota@wdc.com> (raw)
In-Reply-To: <20190607131751.5359-1-naohiro.aota@wdc.com>

All zones of zoned block devices should be reset before writing. Support
this by considering zone reset as a special case of block discard and block
zeroing. Of note is that only zones accepting random writes can be zeroed.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 utils.c | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 88 insertions(+), 6 deletions(-)

diff --git a/utils.c b/utils.c
index a26fe7a5743c..c375b32953f7 100644
--- a/utils.c
+++ b/utils.c
@@ -123,6 +123,37 @@ static int discard_range(int fd, u64 start, u64 len)
 	return 0;
 }
 
+/*
+ * Discard blocks in the zones of a zoned block device.
+ * Process this with zone size granularity so that blocks in
+ * conventional zones are discarded using discard_range and
+ * blocks in sequential zones are discarded though a zone reset.
+ */
+static int discard_zones(int fd, struct btrfs_zone_info *zinfo)
+{
+#ifdef BTRFS_ZONED
+	unsigned int i;
+
+	/* Zone size granularity */
+	for (i = 0; i < zinfo->nr_zones; i++) {
+		if (zinfo->zones[i].type == BLK_ZONE_TYPE_CONVENTIONAL) {
+			discard_range(fd, zinfo->zones[i].start << 9,
+				      zinfo->zone_size);
+		} else if (zinfo->zones[i].cond != BLK_ZONE_COND_EMPTY) {
+			struct blk_zone_range range = {
+				zinfo->zones[i].start,
+				zinfo->zone_size >> 9 };
+			if (ioctl(fd, BLKRESETZONE, &range) < 0)
+				return errno;
+		}
+	}
+
+	return 0;
+#else
+	return -EIO;
+#endif
+}
+
 /*
  * Discard blocks in the given range in 1G chunks, the process is interruptible
  */
@@ -205,8 +236,38 @@ static int zero_blocks(int fd, off_t start, size_t len)
 
 #define ZERO_DEV_BYTES SZ_2M
 
+static int zero_zone_blocks(int fd, struct btrfs_zone_info *zinfo,
+			    off_t start, size_t len)
+{
+	size_t zone_len = zinfo->zone_size;
+	off_t ofst = start;
+	size_t count;
+	int ret;
+
+	/* Make sure that zero_blocks does not write sequential zones */
+	while (len > 0) {
+
+		/* Limit zero_blocks to a single zone */
+		count = min_t(size_t, len, zone_len);
+		if (count > zone_len - (ofst & (zone_len - 1)))
+			count = zone_len - (ofst & (zone_len - 1));
+
+		if (zone_is_random_write(zinfo, ofst)) {
+			ret = zero_blocks(fd, ofst, count);
+			if (ret != 0)
+				return ret;
+		}
+
+		len -= count;
+		ofst += count;
+	}
+
+	return 0;
+}
+
 /* don't write outside the device by clamping the region to the device size */
-static int zero_dev_clamped(int fd, off_t start, ssize_t len, u64 dev_size)
+static int zero_dev_clamped(int fd, struct btrfs_zone_info *zinfo,
+			    off_t start, ssize_t len, u64 dev_size)
 {
 	off_t end = max(start, start + len);
 
@@ -219,6 +280,9 @@ static int zero_dev_clamped(int fd, off_t start, ssize_t len, u64 dev_size)
 	start = min_t(u64, start, dev_size);
 	end = min_t(u64, end, dev_size);
 
+	if (zinfo->model != ZONED_NONE)
+		return zero_zone_blocks(fd, zinfo, start, end - start);
+
 	return zero_blocks(fd, start, end - start);
 }
 
@@ -566,6 +630,7 @@ int btrfs_get_zone_info(int fd, const char *file, int hmzoned,
 int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
 		u64 max_block_count, unsigned opflags)
 {
+	struct btrfs_zone_info zinfo;
 	u64 block_count;
 	struct stat st;
 	int i, ret;
@@ -584,13 +649,30 @@ int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
 	if (max_block_count)
 		block_count = min(block_count, max_block_count);
 
+	ret = btrfs_get_zone_info(fd, file, opflags & PREP_DEVICE_HMZONED,
+				  &zinfo);
+	if (ret < 0)
+		return 1;
+
 	if (opflags & PREP_DEVICE_DISCARD) {
 		/*
 		 * We intentionally ignore errors from the discard ioctl.  It
 		 * is not necessary for the mkfs functionality but just an
-		 * optimization.
+		 * optimization. However, we cannot ignore zone discard (reset)
+		 * errors for a zoned block device as this could result in the
+		 * inability to write to non-empty sequential zones of the
+		 * device.
 		 */
-		if (discard_range(fd, 0, 0) == 0) {
+		if (zinfo.model != ZONED_NONE) {
+			printf("Resetting device zones %s (%u zones) ...\n",
+				file, zinfo.nr_zones);
+			if (discard_zones(fd, &zinfo)) {
+				fprintf(stderr,
+					"ERROR: failed to reset device '%s' zones\n",
+					file);
+				return 1;
+			}
+		} else if (discard_range(fd, 0, 0) == 0) {
 			if (opflags & PREP_DEVICE_VERBOSE)
 				printf("Performing full device TRIM %s (%s) ...\n",
 						file, pretty_size(block_count));
@@ -598,12 +680,12 @@ int btrfs_prepare_device(int fd, const char *file, u64 *block_count_ret,
 		}
 	}
 
-	ret = zero_dev_clamped(fd, 0, ZERO_DEV_BYTES, block_count);
+	ret = zero_dev_clamped(fd, &zinfo, 0, ZERO_DEV_BYTES, block_count);
 	for (i = 0 ; !ret && i < BTRFS_SUPER_MIRROR_MAX; i++)
-		ret = zero_dev_clamped(fd, btrfs_sb_offset(i),
+		ret = zero_dev_clamped(fd, &zinfo, btrfs_sb_offset(i),
 				       BTRFS_SUPER_INFO_SIZE, block_count);
 	if (!ret && (opflags & PREP_DEVICE_ZERO_END))
-		ret = zero_dev_clamped(fd, block_count - ZERO_DEV_BYTES,
+		ret = zero_dev_clamped(fd, &zinfo, block_count - ZERO_DEV_BYTES,
 				       ZERO_DEV_BYTES, block_count);
 
 	if (ret < 0) {
-- 
2.21.0


  parent reply index

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07 13:10 [PATCH v2 00/19] btrfs zoned block device support Naohiro Aota
2019-06-07 13:10 ` [PATCH 01/19] btrfs: introduce HMZONED feature flag Naohiro Aota
2019-06-07 13:10 ` [PATCH 02/19] btrfs: Get zone information of zoned block devices Naohiro Aota
2019-06-13 13:58   ` Josef Bacik
2019-06-18  6:04     ` Naohiro Aota
2019-06-13 13:58   ` Josef Bacik
2019-06-17 18:57   ` David Sterba
2019-06-18  6:42     ` Naohiro Aota
2019-06-27 15:11       ` David Sterba
2019-06-07 13:10 ` [PATCH 03/19] btrfs: Check and enable HMZONED mode Naohiro Aota
2019-06-13 13:57   ` Josef Bacik
2019-06-18  6:43     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 04/19] btrfs: disable fallocate in " Naohiro Aota
2019-06-07 13:10 ` [PATCH 05/19] btrfs: disable direct IO " Naohiro Aota
2019-06-13 14:00   ` Josef Bacik
2019-06-18  8:17     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 06/19] btrfs: align dev extent allocation to zone boundary Naohiro Aota
2019-06-07 13:10 ` [PATCH 07/19] btrfs: do sequential extent allocation in HMZONED mode Naohiro Aota
2019-06-13 14:07   ` Josef Bacik
2019-06-18  8:28     ` Naohiro Aota
2019-06-18 13:37       ` Josef Bacik
2019-06-17 22:30   ` David Sterba
2019-06-18  8:49     ` Naohiro Aota
2019-06-27 15:28       ` David Sterba
2019-06-07 13:10 ` [PATCH 08/19] btrfs: make unmirroed BGs readonly only if we have at least one writable BG Naohiro Aota
2019-06-13 14:09   ` Josef Bacik
2019-06-18  7:42     ` Naohiro Aota
2019-06-18 13:35       ` Josef Bacik
2019-06-07 13:10 ` [PATCH 09/19] btrfs: limit super block locations in HMZONED mode Naohiro Aota
2019-06-13 14:12   ` Josef Bacik
2019-06-18  8:51     ` Naohiro Aota
2019-06-17 22:53   ` David Sterba
2019-06-18  9:01     ` Naohiro Aota
2019-06-27 15:35       ` David Sterba
2019-06-28  3:55   ` Anand Jain
2019-06-28  6:39     ` Naohiro Aota
2019-06-28  6:52       ` Anand Jain
2019-06-07 13:10 ` [PATCH 10/19] btrfs: rename btrfs_map_bio() Naohiro Aota
2019-06-07 13:10 ` [PATCH 11/19] btrfs: introduce submit buffer Naohiro Aota
2019-06-13 14:14   ` Josef Bacik
2019-06-17  3:16     ` Damien Le Moal
2019-06-18  0:00       ` David Sterba
2019-06-18  4:04         ` Damien Le Moal
2019-06-18 13:33       ` Josef Bacik
2019-06-19 10:32         ` Damien Le Moal
2019-06-07 13:10 ` [PATCH 12/19] btrfs: expire submit buffer on timeout Naohiro Aota
2019-06-13 14:15   ` Josef Bacik
2019-06-17  3:19     ` Damien Le Moal
2019-06-07 13:10 ` [PATCH 13/19] btrfs: avoid sync IO prioritization on checksum in HMZONED mode Naohiro Aota
2019-06-13 14:17   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 14/19] btrfs: redirty released extent buffers in sequential BGs Naohiro Aota
2019-06-13 14:24   ` Josef Bacik
2019-06-18  9:09     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 15/19] btrfs: reset zones of unused block groups Naohiro Aota
2019-06-07 13:10 ` [PATCH 16/19] btrfs: wait existing extents before truncating Naohiro Aota
2019-06-13 14:25   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 17/19] btrfs: shrink delayed allocation size in HMZONED mode Naohiro Aota
2019-06-13 14:27   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 18/19] btrfs: support dev-replace " Naohiro Aota
2019-06-13 14:33   ` Josef Bacik
2019-06-18  9:14     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 19/19] btrfs: enable to mount HMZONED incompat flag Naohiro Aota
2019-06-07 13:17 ` [PATCH 01/12] btrfs-progs: build: Check zoned block device support Naohiro Aota
2019-06-07 13:17   ` [PATCH 02/12] btrfs-progs: utils: Introduce queue_param Naohiro Aota
2019-06-07 13:17   ` [PATCH 03/12] btrfs-progs: add new HMZONED feature flag Naohiro Aota
2019-06-07 13:17   ` [PATCH 04/12] btrfs-progs: Introduce zone block device helper functions Naohiro Aota
2019-06-07 13:17   ` [PATCH 05/12] btrfs-progs: load and check zone information Naohiro Aota
2019-06-07 13:17   ` [PATCH 06/12] btrfs-progs: avoid writing super block to sequential zones Naohiro Aota
2019-06-07 13:17   ` Naohiro Aota [this message]
2019-06-07 13:17   ` [PATCH 08/12] btrfs-progs: volume: align chunk allocation to zones Naohiro Aota
2019-06-07 13:17   ` [PATCH 09/12] btrfs-progs: do sequential allocation Naohiro Aota
2019-06-07 13:17   ` [PATCH 10/12] btrfs-progs: mkfs: Zoned block device support Naohiro Aota
2019-06-07 13:17   ` [PATCH 11/12] btrfs-progs: device-add: support HMZONED device Naohiro Aota
2019-06-07 13:17   ` [PATCH 12/12] btrfs-progs: introduce support for dev-place " Naohiro Aota
2019-06-12 17:51 ` [PATCH v2 00/19] btrfs zoned block device support David Sterba
2019-06-13  4:59   ` Naohiro Aota
2019-06-13 13:46     ` David Sterba
2019-06-14  2:07       ` Naohiro Aota
2019-06-17  2:44       ` Damien Le Moal

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190607131751.5359-7-naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=bvanassche@acm.org \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mb@lightnvm.io \
    --cc=nborisov@suse.com \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org linux-btrfs@archiver.kernel.org
	public-inbox-index linux-btrfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox