From: Naohiro Aota <naota@elisp.net>
To: David Sterba <dsterba@suse.com>, linux-btrfs@vger.kernel.org
Cc: Chris Mason <clm@fb.com>, Josef Bacik <jbacik@fb.com>,
linux-kernel@vger.kernel.org, Hannes Reinecke <hare@suse.com>,
Damien Le Moal <damien.lemoal@wdc.com>,
Bart Van Assche <bart.vanassche@wdc.com>,
Matias Bjorling <mb@lightnvm.io>, Naohiro Aota <naota@elisp.net>
Subject: [RFC PATCH 08/17] btrfs: align extent allocation to zone boundary
Date: Fri, 10 Aug 2018 03:04:41 +0900 [thread overview]
Message-ID: <20180809180450.5091-9-naota@elisp.net> (raw)
In-Reply-To: <20180809180450.5091-1-naota@elisp.net>
In HMZONED mode, align the device extents to zone boundaries so that write
I/Os can begin at the start of a zone, as mandated on host-managed zoned
block devices. Also, check that a region allocation is always over empty
zones.
Signed-off-by: Naohiro Aota <naota@elisp.net>
---
fs/btrfs/extent-tree.c | 3 ++
fs/btrfs/volumes.c | 69 ++++++++++++++++++++++++++++++++++++++----
2 files changed, 66 insertions(+), 6 deletions(-)
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index f77226d8020a..fc3daf0e5b92 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9527,6 +9527,9 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr)
min_free = div64_u64(min_free, dev_min);
}
+ /* We cannot allocate size less than zone_size anyway */
+ min_free = max_t(u64, min_free, fs_info->zone_size);
+
/* We need to do this so that we can look at pending chunks */
trans = btrfs_join_transaction(root);
if (IS_ERR(trans)) {
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index ba7ebb80de4d..ada13120c2cd 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1521,6 +1521,31 @@ static int contains_pending_extent(struct btrfs_transaction *transaction,
return ret;
}
+static u64 dev_zone_align(struct btrfs_device *device, u64 pos)
+{
+ if (device->zone_size)
+ return ALIGN(pos, device->zone_size);
+ return pos;
+}
+
+static int is_empty_zone_region(struct btrfs_device *device,
+ u64 pos, u64 num_bytes)
+{
+ if (device->zone_size == 0)
+ return 1;
+
+ WARN_ON(!IS_ALIGNED(pos, device->zone_size));
+ WARN_ON(!IS_ALIGNED(num_bytes, device->zone_size));
+
+ while (num_bytes > 0) {
+ if (!btrfs_dev_is_empty_zone(device, pos))
+ return 0;
+ pos += device->zone_size;
+ num_bytes -= device->zone_size;
+ }
+
+ return 1;
+}
/*
* find_free_dev_extent_start - find free space in the specified device
@@ -1564,9 +1589,14 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
/*
* We don't want to overwrite the superblock on the drive nor any area
* used by the boot loader (grub for example), so we make sure to start
- * at an offset of at least 1MB.
+ * at an offset of at least 1MB on a regular disk. For a zoned block
+ * device, skip the first zone of the device entirely.
*/
- search_start = max_t(u64, search_start, SZ_1M);
+ if (device->zone_size)
+ search_start = max_t(u64, dev_zone_align(device, search_start),
+ device->zone_size);
+ else
+ search_start = max_t(u64, search_start, SZ_1M);
path = btrfs_alloc_path();
if (!path)
@@ -1632,6 +1662,8 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
if (contains_pending_extent(transaction, device,
&search_start,
hole_size)) {
+ search_start = dev_zone_align(device,
+ search_start);
if (key.offset >= search_start) {
hole_size = key.offset - search_start;
} else {
@@ -1640,6 +1672,14 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
}
}
+ if (!is_empty_zone_region(device, search_start,
+ num_bytes)) {
+ search_start = dev_zone_align(device,
+ search_start+1);
+ btrfs_release_path(path);
+ goto again;
+ }
+
if (hole_size > max_hole_size) {
max_hole_start = search_start;
max_hole_size = hole_size;
@@ -1664,7 +1704,7 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
extent_end = key.offset + btrfs_dev_extent_length(l,
dev_extent);
if (extent_end > search_start)
- search_start = extent_end;
+ search_start = dev_zone_align(device, extent_end);
next:
path->slots[0]++;
cond_resched();
@@ -1680,6 +1720,14 @@ int find_free_dev_extent_start(struct btrfs_transaction *transaction,
if (contains_pending_extent(transaction, device, &search_start,
hole_size)) {
+ search_start = dev_zone_align(device,
+ search_start);
+ btrfs_release_path(path);
+ goto again;
+ }
+
+ if (!is_empty_zone_region(device, search_start, num_bytes)) {
+ search_start = dev_zone_align(device, search_start+1);
btrfs_release_path(path);
goto again;
}
@@ -4832,6 +4880,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
int i;
int j;
int index;
+ int hmzoned = btrfs_fs_incompat(info, HMZONED);
BUG_ON(!alloc_profile_is_valid(type, 0));
@@ -4851,13 +4900,18 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
ncopies = btrfs_raid_array[index].ncopies;
if (type & BTRFS_BLOCK_GROUP_DATA) {
- max_stripe_size = SZ_1G;
+ if (hmzoned)
+ max_stripe_size = info->zone_size;
+ else
+ max_stripe_size = SZ_1G;
max_chunk_size = BTRFS_MAX_DATA_CHUNK_SIZE;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_METADATA) {
/* for larger filesystems, use larger metadata chunks */
- if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
+ if (hmzoned)
+ max_stripe_size = info->zone_size;
+ else if (fs_devices->total_rw_bytes > 50ULL * SZ_1G)
max_stripe_size = SZ_1G;
else
max_stripe_size = SZ_256M;
@@ -4865,7 +4919,10 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
if (!devs_max)
devs_max = BTRFS_MAX_DEVS(info);
} else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
- max_stripe_size = SZ_32M;
+ if (hmzoned)
+ max_stripe_size = info->zone_size;
+ else
+ max_stripe_size = SZ_32M;
max_chunk_size = 2 * max_stripe_size;
if (!devs_max)
devs_max = BTRFS_MAX_DEVS_SYS_CHUNK;
--
2.18.0
next prev parent reply other threads:[~2018-08-09 18:06 UTC|newest]
Thread overview: 47+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-08-09 18:04 [RFC PATCH 00/17] btrfs zoned block device support Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 01/17] btrfs: introduce HMZONED feature flag Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 02/17] btrfs: Get zone information of zoned block devices Naohiro Aota
2018-08-10 7:41 ` Nikolay Borisov
2018-08-09 18:04 ` [RFC PATCH 03/17] btrfs: Check and enable HMZONED mode Naohiro Aota
2018-08-10 12:25 ` Hannes Reinecke
2018-08-10 13:15 ` Naohiro Aota
2018-08-10 13:41 ` Hannes Reinecke
2018-08-09 18:04 ` [RFC PATCH 04/17] btrfs: limit super block locations in " Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 05/17] btrfs: disable fallocate " Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 06/17] btrfs: disable direct IO " Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 07/17] btrfs: disable device replace " Naohiro Aota
2018-08-09 18:04 ` Naohiro Aota [this message]
2018-08-09 18:04 ` [RFC PATCH 09/17] btrfs: do sequential allocation on HMZONED drives Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 10/17] btrfs: split btrfs_map_bio() Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 11/17] btrfs: introduce submit buffer Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 12/17] btrfs: expire submit buffer on timeout Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 13/17] btrfs: avoid sync IO prioritization on checksum in HMZONED mode Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 14/17] btrfs: redirty released extent buffers in sequential BGs Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 15/17] btrfs: reset zones of unused block groups Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 16/17] btrfs: wait existing extents before truncating Naohiro Aota
2018-08-09 18:04 ` [RFC PATCH 17/17] btrfs: enable to mount HMZONED incompat flag Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 01/12] btrfs-progs: build: Check zoned block device support Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 02/12] btrfs-progs: utils: Introduce queue_param Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 03/12] btrfs-progs: add new HMZONED feature flag Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 04/12] btrfs-progs: Introduce zone block device helper functions Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 05/12] btrfs-progs: load and check zone information Naohiro Aota
2018-08-09 18:10 ` [RFC PATCH 06/12] btrfs-progs: avoid writing super block to sequential zones Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 07/12] btrfs-progs: support discarding zoned device Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 08/12] btrfs-progs: volume: align chunk allocation to zones Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 09/12] btrfs-progs: mkfs: Zoned block device support Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 10/12] btrfs-progs: device-add: support HMZONED device Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 11/12] btrfs-progs: replace: disable in " Naohiro Aota
2018-08-09 18:11 ` [RFC PATCH 12/12] btrfs-progs: do sequential allocation Naohiro Aota
2018-08-10 7:04 ` [RFC PATCH 00/17] btrfs zoned block device support Hannes Reinecke
2018-08-10 14:24 ` Naohiro Aota
2018-08-10 7:26 ` Hannes Reinecke
2018-08-10 7:28 ` Qu Wenruo
2018-08-16 9:05 ` Naohiro Aota
2018-08-10 7:53 ` Nikolay Borisov
2018-08-10 7:55 ` Nikolay Borisov
2018-08-13 18:42 ` David Sterba
2018-08-13 19:20 ` Hannes Reinecke
2018-08-13 19:29 ` Austin S. Hemmelgarn
2018-08-14 7:41 ` Hannes Reinecke
2018-08-15 11:25 ` Austin S. Hemmelgarn
2018-08-28 10:33 ` Naohiro Aota
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180809180450.5091-9-naota@elisp.net \
--to=naota@elisp.net \
--cc=bart.vanassche@wdc.com \
--cc=clm@fb.com \
--cc=damien.lemoal@wdc.com \
--cc=dsterba@suse.com \
--cc=hare@suse.com \
--cc=jbacik@fb.com \
--cc=linux-btrfs@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mb@lightnvm.io \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).