All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2 v2 RFC] btrfs: keep device type in the struct btrfs_device
       [not found] <cover.1657536723.git.anand.jain@oracle.com>
@ 2022-07-11 11:14 ` Anand Jain
  2022-07-11 11:14 ` [PATCH 2/2 v2 RFC] btrfs: create chunk device type aware Anand Jain
  1 sibling, 0 replies; 2+ messages in thread
From: Anand Jain @ 2022-07-11 11:14 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba

This patch adds a member 'dev_type' to hold the defined device types in
the struct btrfs_devices.

This new member 'dev_type' is in preparation
 - To make data/metadata chunks allocations based on the device types.

Struct btrfs_device has an existing member 'type' that stages and writes
back to the on-disk format. This patch does not use it. As just an
in-memory only data will suffice the requirement here.

The types defined here are the broad classes of the device types ignoring
the interfaces used.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2: Add BTRFS_DEV_TYPE_NR and add device_list_mutex in
    btrfs_init_dev_type

 fs/btrfs/dev-replace.c |  1 +
 fs/btrfs/disk-io.c     |  2 ++
 fs/btrfs/volumes.c     | 38 ++++++++++++++++++++++++++++++++++++++
 fs/btrfs/volumes.h     | 23 +++++++++++++++++++++--
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index f43196a893ca..ff04653eda9d 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -324,6 +324,7 @@ static int btrfs_init_dev_replace_tgtdev(struct btrfs_fs_info *fs_info,
 	device->mode = FMODE_EXCL;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+	device->dev_type = btrfs_get_device_type(device);
 	device->fs_devices = fs_devices;
 
 	ret = btrfs_get_dev_zone_info(device, false);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 70b388de4d66..ceef98267047 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3649,6 +3649,8 @@ int __cold open_ctree(struct super_block *sb, struct btrfs_fs_devices *fs_device
 		goto fail_block_groups;
 	}
 
+	btrfs_init_dev_type(fs_info->fs_devices);
+
 	/*
 	 * If we have a uuid root and we're not being told to rescan we need to
 	 * check the generation here so we can set the
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 2d788a351c1f..b8ab13127caf 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -2570,6 +2570,29 @@ static int btrfs_finish_sprout(struct btrfs_trans_handle *trans)
 	return ret;
 }
 
+enum btrfs_dev_types btrfs_get_device_type(struct btrfs_device *device)
+{
+	bool nonrot = blk_queue_nonrot(bdev_get_queue(device->bdev));
+	bool zoned = bdev_is_zoned(device->bdev);
+
+	if (zoned) {
+		if (nonrot)
+			return BTRFS_DEV_TYPE_ZNS;
+		else
+			return BTRFS_DEV_TYPE_ZONED;
+	}
+
+	if (nonrot) {
+		/* Major 259 is a NVMe device */
+		if (MAJOR(device->devt) == 259)
+			return BTRFS_DEV_TYPE_NVME;
+
+		return BTRFS_DEV_TYPE_NONROT;
+	}
+
+	return BTRFS_DEV_TYPE_ROT;
+}
+
 int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path)
 {
 	struct btrfs_root *root = fs_info->dev_root;
@@ -2662,6 +2685,7 @@ int btrfs_init_new_device(struct btrfs_fs_info *fs_info, const char *device_path
 	device->mode = FMODE_EXCL;
 	device->dev_stats_valid = 1;
 	set_blocksize(device->bdev, BTRFS_BDEV_BLOCKSIZE);
+	device->dev_type = btrfs_get_device_type(device);
 
 	if (seeding_dev) {
 		btrfs_clear_sb_rdonly(sb);
@@ -8293,3 +8317,17 @@ bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical)
 
 	return true;
 }
+
+void btrfs_init_dev_type(struct btrfs_fs_devices *fs_devices)
+{
+	struct btrfs_device *device;
+
+	mutex_lock(&fs_devices->device_list_mutex);
+	list_for_each_entry(device, &fs_devices->devices, dev_list) {
+		if (test_bit(BTRFS_DEV_STATE_MISSING, &device->dev_state))
+			continue;
+
+		device->dev_type = btrfs_get_device_type(device);
+	}
+	mutex_unlock(&fs_devices->device_list_mutex);
+}
diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h
index 9537d82bb7a2..853c7a2e8960 100644
--- a/fs/btrfs/volumes.h
+++ b/fs/btrfs/volumes.h
@@ -86,6 +86,16 @@ struct btrfs_io_geometry {
 #define BTRFS_DEV_STATE_FLUSH_SENT	(4)
 #define BTRFS_DEV_STATE_NO_READA	(5)
 
+/* The device class type list. */
+enum btrfs_dev_types {
+	BTRFS_DEV_TYPE_NVME = 1,
+	BTRFS_DEV_TYPE_NONROT,
+	BTRFS_DEV_TYPE_ZNS,
+	BTRFS_DEV_TYPE_ROT,
+	BTRFS_DEV_TYPE_ZONED,
+	BTRFS_DEV_TYPE_NR
+};
+
 struct btrfs_zoned_device_info;
 
 struct btrfs_device {
@@ -135,9 +145,17 @@ struct btrfs_device {
 
 	/* optimal io width for this device */
 	u32 io_width;
-	/* type and info about this device */
+
+	/* Type and info about this device. On-disk (currently unused) */
 	u64 type;
 
+	/*
+	 * Device type. In memory only. May consider merging with the member
+	 * 'type' above at some point. Possibly, when we want to support
+	 * user-defined devid-based chunk allocation.
+	 */
+	enum btrfs_dev_types dev_type;
+
 	/* minimal io size for this device */
 	u32 sector_size;
 
@@ -714,5 +732,6 @@ int btrfs_bg_type_to_factor(u64 flags);
 const char *btrfs_bg_type_to_raid_name(u64 flags);
 int btrfs_verify_dev_extents(struct btrfs_fs_info *fs_info);
 bool btrfs_repair_one_zone(struct btrfs_fs_info *fs_info, u64 logical);
-
+enum btrfs_dev_types btrfs_get_device_type(struct btrfs_device *device);
+void btrfs_init_dev_type(struct btrfs_fs_devices *fs_devices);
 #endif
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

* [PATCH 2/2 v2 RFC] btrfs: create chunk device type aware
       [not found] <cover.1657536723.git.anand.jain@oracle.com>
  2022-07-11 11:14 ` [PATCH 1/2 v2 RFC] btrfs: keep device type in the struct btrfs_device Anand Jain
@ 2022-07-11 11:14 ` Anand Jain
  1 sibling, 0 replies; 2+ messages in thread
From: Anand Jain @ 2022-07-11 11:14 UTC (permalink / raw)
  To: linux-btrfs; +Cc: dsterba

Mixed device-types use case prefers that the data chunk allocates on lower
latency device type and the metadata chunk allocates on the faster device
type when possible.

As of now, in the function gather_device_info() called from
btrfs_create_chunk(), we sort the devices based on unallocated space only.
After this patch, the function will also check for mixed device types.

First, it sorts the devices based on the latency. That is, sort
ascending if the allocation type is metadata and reverse-sort if the
allocation type is data. Next, within a device type, sort the devices by
their free space.

enum btrfs_device_types values are in the ascending order of latency.
It is a simple static list helps in most common cases. For any user
options it can be added later.

When one of the device types runs out of free space, that device will not
make it to the available device list. Then allocation will continue by
the free space next preferred device type. At some point later, we can
change this behaviour too by the user option, to fail with ENOSPC or to warn().

The advantage of this method is that data/metadata allocation distribution
based on the device type happens automatically for the performance without
any manual configuration.

Signed-off-by: Anand Jain <anand.jain@oracle.com>
---
v2: Initialize btrfs_dev_types array btrfs_devices_by_latency to hold
     latency value. (Kdave).
    Sort devices by type and then by latency. (Kdave).

 fs/btrfs/volumes.c | 89 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 84 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index b8ab13127caf..838ebf62e517 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5027,6 +5027,47 @@ static int btrfs_add_system_chunk(struct btrfs_fs_info *fs_info,
 	return 0;
 }
 
+/* The most preferred type for Metadata is at the top. */
+enum btrfs_dev_types btrfs_devices_by_latency[] = {
+	BTRFS_DEV_TYPE_NVME,
+	BTRFS_DEV_TYPE_NONROT,
+	BTRFS_DEV_TYPE_ZNS,
+	BTRFS_DEV_TYPE_ROT,
+	BTRFS_DEV_TYPE_ZONED,
+};
+
+static int btrfs_dev_type_to_latency(enum btrfs_dev_types type)
+{
+	int p;
+
+	for (p = 0; p < BTRFS_DEV_TYPE_NR; p++) {
+		if (btrfs_devices_by_latency[p] == type)
+			return p;
+	}
+
+	return -EINVAL;
+}
+
+/* Sort the devices in the ascending order of their latency. */
+static int btrfs_device_latency_asc(const void *a, const void *b)
+{
+	const struct btrfs_device_info *di_a = a;
+	const struct btrfs_device_info *di_b = b;
+	int latency_a = btrfs_dev_type_to_latency(di_a->dev->dev_type);
+	int latency_b = btrfs_dev_type_to_latency(di_b->dev->dev_type);
+
+	if (latency_a > latency_b)
+		return 1;
+	if (latency_a < latency_b)
+		return -1;
+	return 0;
+}
+
+static int btrfs_device_latency_des(const void *a, const void *b)
+{
+	return -btrfs_device_latency_asc(a, b);
+}
+
 /*
  * sort the devices in descending order by max_avail, total_avail
  */
@@ -5185,6 +5226,7 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
 	u64 dev_extent_want = ctl->max_stripe_size * ctl->dev_stripes;
 	int ret;
 	int ndevs = 0;
+	unsigned int mixed_type = 0;
 	u64 max_avail;
 	u64 dev_offset;
 
@@ -5239,15 +5281,52 @@ static int gather_device_info(struct btrfs_fs_devices *fs_devices,
 		devices_info[ndevs].max_avail = max_avail;
 		devices_info[ndevs].total_avail = total_avail;
 		devices_info[ndevs].dev = device;
+		mixed_type |= 1 << device->dev_type;
 		++ndevs;
 	}
 	ctl->ndevs = ndevs;
 
-	/*
-	 * now sort the devices by hole size / available space
-	 */
-	sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
-	     btrfs_cmp_device_info, NULL);
+	/* Check if the gathered devices have mixed device types. */
+	if (mixed_type && !is_power_of_2(mixed_type)) {
+		u64 cur_index;
+		u64 start_index;
+		int start_type;
+
+		/*
+		 * Sort devices by their type. Ascending for metadata and descending
+		 * for the data chunks.
+		 */
+		sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
+		     ctl->type & BTRFS_BLOCK_GROUP_DATA ?
+		     btrfs_device_latency_des : btrfs_device_latency_asc,
+		     NULL);
+
+		/* Now sort devices in each type by its available space */
+		start_index = 0;
+		start_type = devices_info[0].dev->dev_type;
+		for (cur_index = 1; cur_index < ndevs; cur_index++) {
+			int cur_type = devices_info[cur_index].dev->dev_type;
+
+			if (cur_type == start_type)
+				continue;
+
+			sort(&devices_info[start_index],
+			     cur_index - start_index,
+			     sizeof(struct btrfs_device_info),
+			     btrfs_cmp_device_info, NULL);
+
+			start_index = cur_index;
+			start_type = cur_type;
+		}
+		if (cur_index - start_index > 1)
+			sort(&devices_info[start_index], cur_index - start_index,
+			     sizeof(struct btrfs_device_info),
+			     btrfs_cmp_device_info, NULL);
+	} else {
+		/* Sort the devices by hole size / available space */
+		sort(devices_info, ndevs, sizeof(struct btrfs_device_info),
+		     btrfs_cmp_device_info, NULL);
+	}
 
 	return 0;
 }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 2+ messages in thread

end of thread, other threads:[~2022-07-11 11:32 UTC | newest]

Thread overview: 2+ messages (download: mbox.gz / follow: Atom feed)
-- links below jump to the message on this page --
     [not found] <cover.1657536723.git.anand.jain@oracle.com>
2022-07-11 11:14 ` [PATCH 1/2 v2 RFC] btrfs: keep device type in the struct btrfs_device Anand Jain
2022-07-11 11:14 ` [PATCH 2/2 v2 RFC] btrfs: create chunk device type aware Anand Jain

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.