linux-btrfs.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Naohiro Aota <naohiro.aota@wdc.com>
To: linux-btrfs@vger.kernel.org, David Sterba <dsterba@suse.com>
Cc: Chris Mason <clm@fb.com>, Josef Bacik <josef@toxicpanda.com>,
	Nikolay Borisov <nborisov@suse.com>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	Matias Bjorling <Matias.Bjorling@wdc.com>,
	Johannes Thumshirn <jthumshirn@suse.de>,
	Hannes Reinecke <hare@suse.com>,
	linux-fsdevel@vger.kernel.org,
	Naohiro Aota <naohiro.aota@wdc.com>
Subject: [PATCH v3 10/15] btrfs-progs: align device extent allocation to zone boundary
Date: Tue, 20 Aug 2019 13:52:53 +0900	[thread overview]
Message-ID: <20190820045258.1571640-11-naohiro.aota@wdc.com> (raw)
In-Reply-To: <20190820045258.1571640-1-naohiro.aota@wdc.com>

In HMZONED mode, align the device extents to zone boundaries so that a zone
reset affects only the device extent and does not change the state of
blocks in the neighbor device extents. Also, check that a region allocation
is always over empty same-type zones and it is not over any locations of
super block copies.

Signed-off-by: Naohiro Aota <naohiro.aota@wdc.com>
---
 common/hmzoned.c | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
 common/hmzoned.h |  2 ++
 kerncompat.h     |  2 ++
 volumes.c        | 72 ++++++++++++++++++++++++++++++++++++++++------
 volumes.h        |  7 +++++
 5 files changed, 149 insertions(+), 8 deletions(-)

diff --git a/common/hmzoned.c b/common/hmzoned.c
index 12eb8f551853..b1d9f5574d35 100644
--- a/common/hmzoned.c
+++ b/common/hmzoned.c
@@ -26,6 +26,8 @@
 #include "common/messages.h"
 #include "mkfs/common.h"
 #include "common/hmzoned.h"
+#include "volumes.h"
+#include "disk-io.h"
 
 #define BTRFS_REPORT_NR_ZONES	8192
 
@@ -272,3 +274,75 @@ int zero_zone_blocks(int fd, struct btrfs_zone_info *zinfo, off_t start,
 
 	return 0;
 }
+
+static inline bool btrfs_dev_is_empty_zone(struct btrfs_device *device, u64 pos)
+{
+	struct btrfs_zone_info *zinfo = &device->zone_info;
+	unsigned int zno;
+
+	if (!zone_is_sequential(zinfo, pos))
+		return true;
+
+	zno = pos / zinfo->zone_size;
+	return zinfo->zones[zno].cond == BLK_ZONE_COND_EMPTY;
+}
+
+
+/*
+ * btrfs_check_allocatable_zones - check if spcecifeid region is
+ *                                 suitable for allocation
+ * @device:	the device to allocate a region
+ * @pos:	the position of the region
+ * @num_bytes:	the size of the region
+ *
+ * In non-ZONED device, anywhere is suitable for allocation. In ZONED
+ * device, check if
+ * 1) the region is not on non-empty zones,
+ * 2) all zones in the region have the same zone type,
+ * 3) it does not contain super block location, if the zones are
+ *    sequential.
+ */
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+				   u64 num_bytes)
+{
+	struct btrfs_zone_info *zinfo = &device->zone_info;
+	u64 nzones, begin, end;
+	u64 sb_pos;
+	bool is_sequential;
+	int i;
+
+	if (zinfo->model == ZONED_NONE)
+		return true;
+
+	nzones = num_bytes / zinfo->zone_size;
+	begin = pos / zinfo->zone_size;
+	end = begin + nzones;
+
+	ASSERT(IS_ALIGNED(pos, zinfo->zone_size));
+	ASSERT(IS_ALIGNED(num_bytes, zinfo->zone_size));
+
+	if (end > zinfo->nr_zones)
+		return false;
+
+	is_sequential = btrfs_dev_is_sequential(device, pos);
+	if (is_sequential) {
+		for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+			sb_pos = btrfs_sb_offset(i);
+			if (!(sb_pos + BTRFS_SUPER_INFO_SIZE <= pos ||
+			      pos + end <= sb_pos))
+				return false;
+		}
+	}
+
+	while (num_bytes) {
+		if (!btrfs_dev_is_empty_zone(device, pos))
+			return false;
+		if (is_sequential != btrfs_dev_is_sequential(device, pos))
+			return false;
+
+		pos += zinfo->zone_size;
+		num_bytes -= zinfo->zone_size;
+	}
+
+	return true;
+}
diff --git a/common/hmzoned.h b/common/hmzoned.h
index 75812716ffd9..93759291871f 100644
--- a/common/hmzoned.h
+++ b/common/hmzoned.h
@@ -53,6 +53,8 @@ enum btrfs_zoned_model zoned_model(const char *file);
 size_t zone_size(const char *file);
 int btrfs_get_zone_info(int fd, const char *file, bool hmzoned,
 			struct btrfs_zone_info *zinfo);
+bool btrfs_check_allocatable_zones(struct btrfs_device *device, u64 pos,
+				   u64 num_bytes);
 
 #ifdef BTRFS_ZONED
 bool zone_is_sequential(struct btrfs_zone_info *zinfo, u64 bytenr);
diff --git a/kerncompat.h b/kerncompat.h
index 9fdc58e25d43..b5105df20a3e 100644
--- a/kerncompat.h
+++ b/kerncompat.h
@@ -28,6 +28,7 @@
 #include <assert.h>
 #include <stddef.h>
 #include <linux/types.h>
+#include <linux/kernel.h>
 #include <stdint.h>
 
 #include <features.h>
@@ -345,6 +346,7 @@ static inline void assert_trace(const char *assertion, const char *filename,
 
 /* Alignment check */
 #define IS_ALIGNED(x, a)                (((x) & ((typeof(x))(a) - 1)) == 0)
+#define ALIGN(x, a)		__ALIGN_KERNEL((x), (a))
 
 static inline int is_power_of_2(unsigned long n)
 {
diff --git a/volumes.c b/volumes.c
index a0ebed547faa..14ca3df0efdb 100644
--- a/volumes.c
+++ b/volumes.c
@@ -465,6 +465,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	int slot;
 	struct extent_buffer *l;
 	u64 min_search_start;
+	u64 zone_size;
 
 	/*
 	 * We don't want to overwrite the superblock on the drive nor any area
@@ -473,6 +474,13 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	 */
 	min_search_start = max(root->fs_info->alloc_start, (u64)SZ_1M);
 	search_start = max(search_start, min_search_start);
+	/*
+	 * For a zoned block device, skip the first zone of the device
+	 * entirely.
+	 */
+	zone_size = device->zone_info.zone_size;
+	search_start = max_t(u64, search_start, zone_size);
+	search_start = btrfs_zone_align(device, search_start);
 
 	path = btrfs_alloc_path();
 	if (!path)
@@ -481,6 +489,7 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 	max_hole_start = search_start;
 	max_hole_size = 0;
 
+again:
 	if (search_start >= search_end) {
 		ret = -ENOSPC;
 		goto out;
@@ -525,6 +534,13 @@ static int find_free_dev_extent_start(struct btrfs_device *device,
 			goto next;
 
 		if (key.offset > search_start) {
+			if (!btrfs_check_allocatable_zones(device, search_start,
+							   num_bytes)) {
+				search_start += zone_size;
+				btrfs_release_path(path);
+				goto again;
+			}
+
 			hole_size = key.offset - search_start;
 
 			/*
@@ -567,6 +583,13 @@ next:
 	 * search_end may be smaller than search_start.
 	 */
 	if (search_end > search_start) {
+		if (!btrfs_check_allocatable_zones(device, search_start,
+						   num_bytes)) {
+			search_start += zone_size;
+			btrfs_release_path(path);
+			goto again;
+		}
+
 		hole_size = search_end - search_start;
 
 		if (hole_size > max_hole_size) {
@@ -610,6 +633,10 @@ int btrfs_insert_dev_extent(struct btrfs_trans_handle *trans,
 	struct extent_buffer *leaf;
 	struct btrfs_key key;
 
+	/* Check alignment to zone for a zoned block device */
+	ASSERT(device->zone_info.model != ZONED_HOST_MANAGED ||
+	       IS_ALIGNED(start, device->zone_info.zone_size));
+
 	path = btrfs_alloc_path();
 	if (!path)
 		return -ENOMEM;
@@ -1012,17 +1039,13 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	int max_stripes = 0;
 	int min_stripes = 1;
 	int sub_stripes;	/* sub_stripes info for map */
-	int dev_stripes __attribute__((unused));
-				/* stripes per dev */
+	int dev_stripes;	/* stripes per dev */
 	int devs_max;		/* max devs to use */
-	int devs_min __attribute__((unused));
-				/* min devs needed */
+	int devs_min;		/* min devs needed */
 	int devs_increment __attribute__((unused));
 				/* ndevs has to be a multiple of this */
-	int ncopies __attribute__((unused));
-				/* how many copies to data has */
-	int nparity __attribute__((unused));
-				/* number of stripes worth of bytes to
+	int ncopies;		/* how many copies to data has */
+	int nparity;		/* number of stripes worth of bytes to
 				   store parity information */
 	int looped = 0;
 	int ret;
@@ -1030,6 +1053,8 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 	int stripe_len = BTRFS_STRIPE_LEN;
 	struct btrfs_key key;
 	u64 offset;
+	bool hmzoned = info->fs_devices->hmzoned;
+	u64 zone_size = info->fs_devices->zone_size;
 
 	if (list_empty(dev_list)) {
 		return -ENOSPC;
@@ -1116,13 +1141,39 @@ int btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
 				    btrfs_super_stripesize(info->super_copy));
 	}
 
+	if (hmzoned) {
+		calc_size = zone_size;
+		max_chunk_size = round_down(max_chunk_size, zone_size);
+	}
+
 	/* we don't want a chunk larger than 10% of the FS */
 	percent_max = div_factor(btrfs_super_total_bytes(info->super_copy), 1);
 	max_chunk_size = min(percent_max, max_chunk_size);
 
+	if (hmzoned) {
+		int min_num_stripes = devs_min * dev_stripes;
+		int min_data_stripes = (min_num_stripes - nparity) / ncopies;
+		u64 min_chunk_size = min_data_stripes * zone_size;
+
+		max_chunk_size = max(round_down(max_chunk_size,
+						zone_size),
+				     min_chunk_size);
+	}
+
 again:
 	if (chunk_bytes_by_type(type, calc_size, num_stripes, sub_stripes) >
 	    max_chunk_size) {
+		if (hmzoned) {
+			/*
+			 * calc_size is fixed in HMZONED. Reduce
+			 * num_stripes instead.
+			 */
+			num_stripes = max_chunk_size / calc_size;
+			if (num_stripes < min_stripes)
+				return -ENOSPC;
+			goto again;
+		}
+
 		calc_size = max_chunk_size;
 		calc_size /= num_stripes;
 		calc_size /= stripe_len;
@@ -1133,6 +1184,9 @@ again:
 
 	calc_size /= stripe_len;
 	calc_size *= stripe_len;
+
+	ASSERT(!hmzoned || calc_size == zone_size);
+
 	INIT_LIST_HEAD(&private_devs);
 	cur = dev_list->next;
 	index = 0;
@@ -1214,6 +1268,8 @@ again:
 		if (ret < 0)
 			goto out_chunk_map;
 
+		ASSERT(!zone_size || IS_ALIGNED(dev_offset, zone_size));
+
 		device->bytes_used += calc_size;
 		ret = btrfs_update_device(trans, device);
 		if (ret < 0)
diff --git a/volumes.h b/volumes.h
index b5e7a07df5a8..d1326d068ca3 100644
--- a/volumes.h
+++ b/volumes.h
@@ -323,4 +323,11 @@ static inline bool btrfs_dev_is_sequential(struct btrfs_device *device, u64 pos)
 {
 	return zone_is_sequential(&device->zone_info, pos);
 }
+static inline u64 btrfs_zone_align(struct btrfs_device *device, u64 pos)
+{
+	if (device->zone_info.model == ZONED_NONE)
+		return pos;
+
+	return ALIGN(pos, device->zone_info.zone_size);
+}
 #endif
-- 
2.23.0


  parent reply	other threads:[~2019-08-20  4:53 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-08-20  4:52 [PATCH v3 00/15] btrfs-progs: zoned block device support Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 01/15] btrfs-progs: utils: Introduce queue_param helper function Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 02/15] btrfs-progs: introduce raid parameters variables Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 03/15] btrfs-progs: build: Check zoned block device support Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 04/15] btrfs-progs: add new HMZONED feature flag Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 05/15] btrfs-progs: Introduce zone block device helper functions Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 06/15] btrfs-progs: load and check zone information Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 07/15] btrfs-progs: avoid writing super block to sequential zones Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 08/15] btrfs-progs: support discarding zoned device Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 09/15] btrfs-progs: support zero out on zoned block device Naohiro Aota
2019-08-20  4:52 ` Naohiro Aota [this message]
2019-08-20  4:52 ` [PATCH v3 11/15] btrfs-progs: do sequential allocation in HMZONED mode Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 12/15] btrfs-progs: redirty clean extent buffers in seq Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 13/15] btrfs-progs: mkfs: Zoned block device support Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 14/15] btrfs-progs: device-add: support HMZONED device Naohiro Aota
2019-08-20  4:52 ` [PATCH v3 15/15] btrfs-progs: introduce support for device replace " Naohiro Aota

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190820045258.1571640-11-naohiro.aota@wdc.com \
    --to=naohiro.aota@wdc.com \
    --cc=Matias.Bjorling@wdc.com \
    --cc=clm@fb.com \
    --cc=damien.lemoal@wdc.com \
    --cc=dsterba@suse.com \
    --cc=hare@suse.com \
    --cc=josef@toxicpanda.com \
    --cc=jthumshirn@suse.de \
    --cc=linux-btrfs@vger.kernel.org \
    --cc=linux-fsdevel@vger.kernel.org \
    --cc=nborisov@suse.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).