Linux-BTRFS Archive on lore.kernel.org
 help / color / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, David Sterba <dsterba@suse.com>
Cc: "Chris Mason" <clm@fb.com>, "Josef Bacik" <josef@toxicpanda.com>,
	"Qu Wenruo" <wqu@suse.com>, "Nikolay Borisov" <nborisov@suse.com>,
	linux-kernel@vger.kernel.org, "Hannes Reinecke" <hare@suse.com>,
	linux-fsdevel@vger.kernel.org,
	"Damien Le Moal" <damien.lemoal@wdc.com>,
	"Matias Bjørling" <mb@lightnvm.io>,
	"Johannes Thumshirn" <jthumshirn@suse.de>,
	"Bart Van Assche" <bvanassche@acm.org>,
	"Naohiro Aota" <naohiro.aota@wdc.com>
Subject: [PATCH 08/12] btrfs-progs: volume: align chunk allocation to zones
Date: Fri,  7 Jun 2019 22:17:47 +0900
Message-ID: <20190607131751.5359-8-naohiro.aota@wdc.com> (raw)
In-Reply-To: <20190607131751.5359-1-naohiro.aota@wdc.com>

To facilitate support for zoned block devices in the extent buffer
allocation, a zoned block device chunk is always aligned to a zone of the
device. With this, the zone write pointer location simply becomes a hint to
allocate new buffers.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 volumes.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/volumes.c b/volumes.c
index f6d1b1e9dc7f..64b42643390b 100644
--- a/volumes.c
+++ b/volumes.c
@@ -399,6 +399,34 @@ int btrfs_scan_one_device(int fd, const char *path,
 	return ret;
 }
 
+/* zone size is ensured to be power of 2 */
+static u64 btrfs_zone_align(struct btrfs_zone_info *zinfo, u64 val)
+{
+	if (zinfo && zinfo->zone_size)
+		return (val + zinfo->zone_size - 1) & ~(zinfo->zone_size - 1);
+	return val;
+}
+
+static bool check_dev_zone(struct btrfs_zone_info *zinfo, u64 physical,
+			   u64 num_bytes)
+{
+	u64 zone_size = zinfo->zone_size;
+	int zone_is_random;
+
+	WARN_ON(!IS_ALIGNED(num_bytes, zone_size));
+	zone_is_random = zone_is_random_write(zinfo, physical);
+
+	while (num_bytes) {
+		if (zone_is_random != zone_is_random_write(zinfo, physical))
+			return false;
+
+		physical += zone_size;
+		num_bytes -= zone_size;
+	}
+
+	return true;
+}
+
 /*
  * find_free_dev_extent_start - find free space in the specified device
  * @device:	  the device which we search the free space in
@@ -428,6 +456,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	struct btrfs_root *root = device->dev_root;
 	struct btrfs_dev_extent *dev_extent;
 	struct btrfs_path *path;
+	struct btrfs_zone_info *zinfo = &device->zinfo;
 	u64 hole_size;
 	u64 max_hole_start;
 	u64 max_hole_size;
@@ -445,6 +474,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	 */
 	min_search_start = max(root->fs_info->alloc_start, (u64)SZ_1M);
 	search_start = max(search_start, min_search_start);
+	search_start = btrfs_zone_align(zinfo, search_start);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -497,6 +527,18 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 			goto next;
 
 		if (key.offset > search_start) {
+			if (zinfo && zinfo->zone_size) {
+				while (key.offset > search_start) {
+					hole_size = key.offset - search_start;
+					if (hole_size < num_bytes)
+						break;
+					if (check_dev_zone(zinfo, search_start,
+							   num_bytes))
+						break;
+					search_start += zinfo->zone_size;
+				}
+			}
+
 			hole_size = key.offset - search_start;
 
 			/*
@@ -527,7 +569,8 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 		extent_end = key.offset + btrfs_dev_extent_length(l,
 								  dev_extent);
 		if (extent_end > search_start)
-			search_start = extent_end;
+			search_start =  btrfs_zone_align(&device->zinfo,
+							 extent_end);
 next:
 		path->slots[0]++;
 		cond_resched();
@@ -539,6 +582,18 @@ next:
 	 * search_end may be smaller than search_start.
 	 */
 	if (search_end > search_start) {
+		if (zinfo && zinfo->zone_size) {
+			while (search_end > search_start) {
+				hole_size = search_end - search_start;
+				if (hole_size < num_bytes)
+					break;
+				if (check_dev_zone(zinfo, search_start,
+						   num_bytes))
+					break;
+				search_start += zinfo->zone_size;
+			}
+		}
+
 		hole_size = search_end - search_start;
 
 		if (hole_size > max_hole_size) {
@@ -582,6 +637,9 @@ int btrfs_insert_dev_extent(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
 
+	/* Align to zone for a zoned block device */
+	start = btrfs_zone_align(&device->zinfo, start);
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -1065,9 +1123,15 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 				    btrfs_super_stripesize(info->super_copy));
 	}
 
-	/* we don't want a chunk larger than 10% of the FS */
-	percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
-	max_chunk_size = min(percent_max, max_chunk_size);
+	if (info->fs_devices->hmzoned) {
+		/* Zoned mode uses zone aligned chunks */
+		calc_size = info->fs_devices->zone_size;
+		max_chunk_size = calc_size * num_stripes;
+	} else {
+		/* we don't want a chunk larger than 10% of the FS */
+		percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
+		max_chunk_size = min(percent_max, max_chunk_size);
+	}
 
 again:
 	if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
@@ -1147,7 +1211,9 @@ again:
 	*num_bytes = chunk_bytes_by_type(type, calc_size,
 					 num_stripes, sub_stripes);
 	index = 0;
+	dev_offset = 0;
 	while(index < num_stripes) {
+		size_t zone_size = device->zinfo.zone_size;
 		struct btrfs_stripe *stripe;
 		BUG_ON(list_empty(&private_devs));
 		cur = private_devs.next;
@@ -1158,11 +1224,16 @@ again:
 		    (index == num_stripes - 1))
 			list_move_tail(&device->dev_list, dev_list);
 
+		if (device->zinfo.zone_size)
+			calc_size = device->zinfo.zone_size;
+
 		ret = btrfs_alloc_dev_extent(trans, device, key.offset,
 			     calc_size, &dev_offset);
 		if (ret < 0)
 			goto out_chunk_map;
 
+		WARN_ON(zone_size && !IS_ALIGNED(dev_offset, zone_size));
+
 		device->bytes_used += calc_size;
 		ret = btrfs_update_device(trans, device);
 		if (ret < 0)
-- 
2.21.0


  parent reply index

Thread overview: 79+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-07 13:10 [PATCH v2 00/19] btrfs zoned block device support Naohiro Aota
2019-06-07 13:10 ` [PATCH 01/19] btrfs: introduce HMZONED feature flag Naohiro Aota
2019-06-07 13:10 ` [PATCH 02/19] btrfs: Get zone information of zoned block devices Naohiro Aota
2019-06-13 13:58   ` Josef Bacik
2019-06-18  6:04     ` Naohiro Aota
2019-06-13 13:58   ` Josef Bacik
2019-06-17 18:57   ` David Sterba
2019-06-18  6:42     ` Naohiro Aota
2019-06-27 15:11       ` David Sterba
2019-06-07 13:10 ` [PATCH 03/19] btrfs: Check and enable HMZONED mode Naohiro Aota
2019-06-13 13:57   ` Josef Bacik
2019-06-18  6:43     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 04/19] btrfs: disable fallocate in " Naohiro Aota
2019-06-07 13:10 ` [PATCH 05/19] btrfs: disable direct IO " Naohiro Aota
2019-06-13 14:00   ` Josef Bacik
2019-06-18  8:17     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 06/19] btrfs: align dev extent allocation to zone boundary Naohiro Aota
2019-06-07 13:10 ` [PATCH 07/19] btrfs: do sequential extent allocation in HMZONED mode Naohiro Aota
2019-06-13 14:07   ` Josef Bacik
2019-06-18  8:28     ` Naohiro Aota
2019-06-18 13:37       ` Josef Bacik
2019-06-17 22:30   ` David Sterba
2019-06-18  8:49     ` Naohiro Aota
2019-06-27 15:28       ` David Sterba
2019-06-07 13:10 ` [PATCH 08/19] btrfs: make unmirroed BGs readonly only if we have at least one writable BG Naohiro Aota
2019-06-13 14:09   ` Josef Bacik
2019-06-18  7:42     ` Naohiro Aota
2019-06-18 13:35       ` Josef Bacik
2019-06-07 13:10 ` [PATCH 09/19] btrfs: limit super block locations in HMZONED mode Naohiro Aota
2019-06-13 14:12   ` Josef Bacik
2019-06-18  8:51     ` Naohiro Aota
2019-06-17 22:53   ` David Sterba
2019-06-18  9:01     ` Naohiro Aota
2019-06-27 15:35       ` David Sterba
2019-06-28  3:55   ` Anand Jain
2019-06-28  6:39     ` Naohiro Aota
2019-06-28  6:52       ` Anand Jain
2019-06-07 13:10 ` [PATCH 10/19] btrfs: rename btrfs_map_bio() Naohiro Aota
2019-06-07 13:10 ` [PATCH 11/19] btrfs: introduce submit buffer Naohiro Aota
2019-06-13 14:14   ` Josef Bacik
2019-06-17  3:16     ` Damien Le Moal
2019-06-18  0:00       ` David Sterba
2019-06-18  4:04         ` Damien Le Moal
2019-06-18 13:33       ` Josef Bacik
2019-06-19 10:32         ` Damien Le Moal
2019-06-07 13:10 ` [PATCH 12/19] btrfs: expire submit buffer on timeout Naohiro Aota
2019-06-13 14:15   ` Josef Bacik
2019-06-17  3:19     ` Damien Le Moal
2019-06-07 13:10 ` [PATCH 13/19] btrfs: avoid sync IO prioritization on checksum in HMZONED mode Naohiro Aota
2019-06-13 14:17   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 14/19] btrfs: redirty released extent buffers in sequential BGs Naohiro Aota
2019-06-13 14:24   ` Josef Bacik
2019-06-18  9:09     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 15/19] btrfs: reset zones of unused block groups Naohiro Aota
2019-06-07 13:10 ` [PATCH 16/19] btrfs: wait existing extents before truncating Naohiro Aota
2019-06-13 14:25   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 17/19] btrfs: shrink delayed allocation size in HMZONED mode Naohiro Aota
2019-06-13 14:27   ` Josef Bacik
2019-06-07 13:10 ` [PATCH 18/19] btrfs: support dev-replace " Naohiro Aota
2019-06-13 14:33   ` Josef Bacik
2019-06-18  9:14     ` Naohiro Aota
2019-06-07 13:10 ` [PATCH 19/19] btrfs: enable to mount HMZONED incompat flag Naohiro Aota
2019-06-07 13:17 ` [PATCH 01/12] btrfs-progs: build: Check zoned block device support Naohiro Aota
2019-06-07 13:17   ` [PATCH 02/12] btrfs-progs: utils: Introduce queue_param Naohiro Aota
2019-06-07 13:17   ` [PATCH 03/12] btrfs-progs: add new HMZONED feature flag Naohiro Aota
2019-06-07 13:17   ` [PATCH 04/12] btrfs-progs: Introduce zone block device helper functions Naohiro Aota
2019-06-07 13:17   ` [PATCH 05/12] btrfs-progs: load and check zone information Naohiro Aota
2019-06-07 13:17   ` [PATCH 06/12] btrfs-progs: avoid writing super block to sequential zones Naohiro Aota
2019-06-07 13:17   ` [PATCH 07/12] btrfs-progs: support discarding zoned device Naohiro Aota
2019-06-07 13:17   ` Naohiro Aota [this message]
2019-06-07 13:17   ` [PATCH 09/12] btrfs-progs: do sequential allocation Naohiro Aota
2019-06-07 13:17   ` [PATCH 10/12] btrfs-progs: mkfs: Zoned block device support Naohiro Aota
2019-06-07 13:17   ` [PATCH 11/12] btrfs-progs: device-add: support HMZONED device Naohiro Aota
2019-06-07 13:17   ` [PATCH 12/12] btrfs-progs: introduce support for dev-place " Naohiro Aota
2019-06-12 17:51 ` [PATCH v2 00/19] btrfs zoned block device support David Sterba
2019-06-13  4:59   ` Naohiro Aota
2019-06-13 13:46     ` David Sterba
2019-06-14  2:07       ` Naohiro Aota
2019-06-17  2:44       ` Damien Le Moal

Reply instructions:

You may reply publically to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190607131751.5359-8-naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=bvanassche@acm.org \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mb@lightnvm.io \
    --cc=nborisov@suse.com \
    --cc=wqu@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Linux-BTRFS Archive on lore.kernel.org

Archives are clonable:
	git clone --mirror https://lore.kernel.org/linux-btrfs/0 linux-btrfs/git/0.git

	# If you have public-inbox 1.1+ installed, you may
	# initialize and index your mirror using the following commands:
	public-inbox-init -V2 linux-btrfs linux-btrfs/ https://lore.kernel.org/linux-btrfs \
		linux-btrfs@vger.kernel.org linux-btrfs@archiver.kernel.org
	public-inbox-index linux-btrfs


Newsgroup available over NNTP:
	nntp://nntp.lore.kernel.org/org.kernel.vger.linux-btrfs


AGPL code for this site: git clone https://public-inbox.org/ public-inbox